## <font color="maroon"><h4 align="center">Handling Missing Data - replace method</font>

In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("weather_data.csv", parse_dates=['day'])
print(type(df.day[0]))
print(df)
df.set_index('day', inplace=True)
df

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
         day  temperature  windspeed  event
0 2017-01-01           32          6   Rain
1 2017-01-02       -99999          7  Sunny
2 2017-01-03           28     -99999   Snow
3 2017-01-04       -99999          7      0
4 2017-01-05           32     -99999   Rain
5 2017-01-06           31          2  Sunny
6 2017-01-06           34          5      0


Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32,6,Rain
2017-01-02,-99999,7,Sunny
2017-01-03,28,-99999,Snow
2017-01-04,-99999,7,0
2017-01-05,32,-99999,Rain
2017-01-06,31,2,Sunny
2017-01-06,34,5,0


**Replacing single value**

In [2]:
new_df = df.replace(-99999, value=np.NaN)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6.0,Rain
2017-01-02,,7.0,Sunny
2017-01-03,28.0,,Snow
2017-01-04,,7.0,0
2017-01-05,32.0,,Rain
2017-01-06,31.0,2.0,Sunny
2017-01-06,34.0,5.0,0


**Replacing list with single value**

In [3]:
new_df = df.replace(to_replace=[-99999,-88888], value=0)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32,6,Rain
2017-01-02,0,7,Sunny
2017-01-03,28,0,Snow
2017-01-04,0,7,0
2017-01-05,32,0,Rain
2017-01-06,31,2,Sunny
2017-01-06,34,5,0


**Replacing per column**

In [4]:
new_df = df.replace({
        'temperature': -99999,
        'windspeed': -99999,
        'event': '0'
    }, np.nan)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6.0,Rain
2017-01-02,,7.0,Sunny
2017-01-03,28.0,,Snow
2017-01-04,,7.0,
2017-01-05,32.0,,Rain
2017-01-06,31.0,2.0,Sunny
2017-01-06,34.0,5.0,


**Replacing by using mapping**

In [5]:
new_df = df.replace({
        -99999: np.nan,
        'no event': 'Sunny',
    })
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6.0,Rain
2017-01-02,,7.0,Sunny
2017-01-03,28.0,,Snow
2017-01-04,,7.0,0
2017-01-05,32.0,,Rain
2017-01-06,31.0,2.0,Sunny
2017-01-06,34.0,5.0,0


**Regex**

In [6]:
# when windspeed is 6 mph, 7 mph etc. & temperature is 32 F, 28 F etc.
new_df = df.replace({'temperature': '[A-Za-z]', 'windspeed': '[a-z]'},'', regex=True) 
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32,6,Rain
2017-01-02,-99999,7,Sunny
2017-01-03,28,-99999,Snow
2017-01-04,-99999,7,0
2017-01-05,32,-99999,Rain
2017-01-06,31,2,Sunny
2017-01-06,34,5,0


**Replacing list with another list**

In [7]:
df = pd.DataFrame({
    'score': ['exceptional','average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['rob', 'maya', 'parthiv', 'tom', 'julian', 'erica']
})
df

Unnamed: 0,score,student
0,exceptional,rob
1,average,maya
2,good,parthiv
3,poor,tom
4,average,julian
5,exceptional,erica


In [8]:
df.replace(['poor', 'average', 'good', 'exceptional'], [1,2,3,4])
print(df)

         score  student
0  exceptional      rob
1      average     maya
2         good  parthiv
3         poor      tom
4      average   julian
5  exceptional    erica
