Resource: 

In [1]:
import pandas as pd
import numpy as np
print(pd.__version__)
print(np.__version__)

0.22.0
1.14.0


In [2]:
df = pd.read_csv('weather_data_missing.csv', index_col = ['day'], parse_dates = [0])
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32,6,Rain
2017-01-02,-99999,7,Sunny
2017-01-03,28,-99999,Snow
2017-01-04,-99999,7,0
2017-01-05,32,-99999,Rain
2017-01-06,31,2,Sunny
2017-01-06,34,5,0


In [3]:
# Replacing the negative values with NaN
new_df = df.replace(-99999, value = np.NaN)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6.0,Rain
2017-01-02,,7.0,Sunny
2017-01-03,28.0,,Snow
2017-01-04,,7.0,0
2017-01-05,32.0,,Rain
2017-01-06,31.0,2.0,Sunny
2017-01-06,34.0,5.0,0


In [12]:
# Replacing list with multiple values using a dictionary with the pd.replace()
new_df = df.replace({
    'temperature': -99999,
    'windspeed': -99999,
    'event':'0'
}, value = np.NaN)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6.0,Rain
2017-01-02,,7.0,Sunny
2017-01-03,28.0,,Snow
2017-01-04,,7.0,
2017-01-05,32.0,,Rain
2017-01-06,31.0,2.0,Sunny
2017-01-06,34.0,5.0,


In [5]:
# Replacing values with dict mapping:
new_df = df.replace({ -99999: 'NaN',
                     '0' :  'Sunny'})
new_df

# We can see that the -99999 value got replaced by NaN and the 0 in replaced by Sunny

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6.0,Rain
2017-01-02,,7.0,Sunny
2017-01-03,28.0,,Snow
2017-01-04,,7.0,Sunny
2017-01-05,32.0,,Rain
2017-01-06,31.0,2.0,Sunny
2017-01-06,34.0,5.0,Sunny


In [6]:
# To handle the dataset given below we can use Regex
df_manipulated = pd.read_csv('weather_data_mani.csv')
df_manipulated

Unnamed: 0,day,temperature,windspeed,event
0,1/1/17,32 F,6 mph,Rain
1,1/4/17,,9,Sunny
2,1/5/17,28,,Snow
3,1/6/17,,7,
4,1/7/17,32 C,,Rain
5,1/8/17,,,Sunny
6,1/9/17,,,
7,1/10/17,34,8,Cloudy
8,1/11/17,40 C,12 mph,Sunny


In [7]:
new_df = df.replace('[A-Za-z]','', regex = True)
new_df
# Note if we use the above method, the entire event column values gets deleted,
# because it contains values that are string or non-numeric

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32,6,
2017-01-02,-99999,7,
2017-01-03,28,-99999,
2017-01-04,-99999,7,0.0
2017-01-05,32,-99999,
2017-01-06,31,2,
2017-01-06,34,5,0.0


In [8]:
# To handle the above issue we can use dictionary with Regex(for catching any 
# non-numeric values)
new_df = df.replace({
    'temperature':'[A-Za-z]',
    'windspeed':'[A-Za-z]'},'', regex = True
)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32,6,Rain
2017-01-02,-99999,7,Sunny
2017-01-03,28,-99999,Snow
2017-01-04,-99999,7,0
2017-01-05,32,-99999,Rain
2017-01-06,31,2,Sunny
2017-01-06,34,5,0


In [9]:
df_dummy = pd.DataFrame({
    'score':['exceptional', 'average','good', 'poor','average','exceptional'],
    'student':['rob','maya','parthiv','tom','julian','erica']
})
df_dummy

Unnamed: 0,score,student
0,exceptional,rob
1,average,maya
2,good,parthiv
3,poor,tom
4,average,julian
5,exceptional,erica


In [10]:
 # consider that we want to convert the score from chacter values to numeric grades
    # like 10-exceptional 8-good 5-average 3-poor
    
new_df = df_dummy.replace(['exceptional','good','average','poor'],[10,8,5,3])
new_df

Unnamed: 0,score,student
0,10,rob
1,5,maya
2,8,parthiv
3,3,tom
4,5,julian
5,10,erica


In [13]:
# Adding a new column with dummy values:
new_values = np.random.randint(-10, 0, len(new_df))
new_values
df["dummy_colm"] =  new_values

In [14]:
df.head()

Unnamed: 0_level_0,temperature,windspeed,event,dummy_colm
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01,32,6,Rain,-10
2017-01-02,-99999,7,Sunny,-10
2017-01-03,28,-99999,Snow,-8
2017-01-04,-99999,7,0,-1
2017-01-05,32,-99999,Rain,-3


In [20]:
# replacing the negative values with NAN

new_df = df
new_df[new_df < 0]= np.NAN

In [21]:
new_df

Unnamed: 0_level_0,temperature,windspeed,event,dummy_colm
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01,32.0,6.0,,
2017-01-02,,7.0,,
2017-01-03,28.0,,,
2017-01-04,,7.0,,
2017-01-05,32.0,,,
2017-01-06,31.0,2.0,,
2017-01-06,34.0,5.0,,
