# Handle Missing Data with replace function

In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv('6_weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


### replace()

In [2]:
# replace(value_to_be_replaced, value_replacing)
# for multiple value to be replaced, pass a list containing all the values to be replaced to the replace function
new_df = df.replace(-99999, np.NaN)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,0
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,0


In [5]:
# replace different values in different column by passing a dictionary to the function
new_df = df.replace({
    'temperature': -99999,
    'windspeed': -99999,
    'event': "0"
}, np.NaN)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,


In [7]:
# map the value to be replace with the value replacing
new_df = df.replace({
    -99999: np.NaN,
    '0': 'Sunny'
})
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,Sunny
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,Sunny


### regex

In [11]:
# this csv contains inconsistent data
df_letter = pd.read_csv('6_weather_data_with_letter.csv')
df_letter

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,-99999,7 mph,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,No Event
4,1/5/2017,32 C,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,No Event


In [12]:
# to remove the letters, use regex and replace all the letters with ''
new_df_letter = df_letter.replace({
    'temperature': '[a-zA-z]',
    'windspeed': '[a-zA-Z]'
}, '', regex=True)
new_df_letter

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,No Event
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,No Event


### replace list with list

In [13]:
df = pd.DataFrame({
    'score': ['exceptional','average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['rob', 'maya', 'parthiv', 'tom', 'julian', 'erica']
})
df

Unnamed: 0,score,student
0,exceptional,rob
1,average,maya
2,good,parthiv
3,poor,tom
4,average,julian
5,exceptional,erica


In [14]:
new_df = df.replace(['poor', 'average', 'good', 'exceptional'], [1, 2, 3, 4])
new_df

Unnamed: 0,score,student
0,4,rob
1,2,maya
2,3,parthiv
3,1,tom
4,2,julian
5,4,erica
