# Missing Data 

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn
np.random.seed (42)

In [3]:
df = pd.DataFrame(randn(10,3) ,columns = ['X','Y','Z'])

In [4]:
df

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,0.647689
1,1.52303,-0.234153,-0.234137
2,1.579213,0.767435,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,1.465649
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [5]:
df < 0.6

Unnamed: 0,X,Y,Z
0,True,True,False
1,False,True,True
2,False,False,True
3,True,True,True
4,True,True,True
5,True,True,True
6,True,True,False
7,True,True,True
8,True,True,True
9,True,True,True


In [6]:
df1 = df [df < 0.6] 

In [7]:
df1

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,
1,,-0.234153,-0.234137
2,,,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [8]:
df1.shape

(10, 3)

In [9]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       8 non-null      float64
 1   Y       9 non-null      float64
 2   Z       8 non-null      float64
dtypes: float64(3)
memory usage: 368.0 bytes


### Drop Missing Values using `DataFrame.dropna()`

In [10]:
df1.dropna () # inplace = True

Unnamed: 0,X,Y,Z
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [11]:
df1

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,
1,,-0.234153,-0.234137
2,,,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [12]:
df1.dropna(thresh = 2) #thresh : minimum number of null values in one row to be dropped

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,
1,,-0.234153,-0.234137
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [13]:
# df1.dropna (axis=1)

### Filling Null NAN with Values `DataFrame.fillna()`

In [14]:
df1

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,
1,,-0.234153,-0.234137
2,,,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [15]:
df1.fillna (value=1)

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,1.0
1,1.0,-0.234153,-0.234137
2,1.0,1.0,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,1.0
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [16]:
df1

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,
1,,-0.234153,-0.234137
2,,,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [17]:
print (df1['X'].fillna(value=1))
print (df1['Y'].fillna(value=2))
print (df1['Z'].fillna(value="Asem"))

0    0.496714
1    1.000000
2    1.000000
3    0.542560
4    0.241962
5   -0.562288
6   -0.908024
7   -0.225776
8   -0.544383
9    0.375698
Name: X, dtype: float64
0   -0.138264
1   -0.234153
2    2.000000
3   -0.463418
4   -1.913280
5   -1.012831
6   -1.412304
7    0.067528
8    0.110923
9   -0.600639
Name: Y, dtype: float64
0        Asem
1   -0.234137
2   -0.469474
3    -0.46573
4   -1.724918
5    0.314247
6        Asem
7   -1.424748
8   -1.150994
9   -0.291694
Name: Z, dtype: object


In [18]:
df1

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,
1,,-0.234153,-0.234137
2,,,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [19]:
df1['X'].mean()

-0.0729420179081248

In [20]:
df1['X'].fillna (value=df1['X'].mean() ,inplace=True)

In [21]:
df1

Unnamed: 0,X,Y,Z
0,0.496714,-0.138264,
1,-0.072942,-0.234153,-0.234137
2,-0.072942,,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694
