In [1]:
import pandas as pd
import numpy as np

In [2]:
employees = ['employee1', '  employee2', 'employee3  ', 'employee4  ','employee5', 'employee6', 'employee7', 'employee8']
position = ['analyst', 'manager', 'analyst', 'analyst', 'manager','senior manager','manager',None]
salary = [30000,56000,28000,33000,60000,75000, None, None]
columns = ['employee', 'position', 'salary']

In [3]:
df = pd.DataFrame(data = list(zip(employees, position, salary)), columns=columns)

In [4]:
df

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0
6,employee7,manager,
7,employee8,,


In [5]:
# The employee's column values contain leading and trailing whitespace
df['employee']

0      employee1
1      employee2
2    employee3  
3    employee4  
4      employee5
5      employee6
6      employee7
7      employee8
Name: employee, dtype: object

In [6]:
# ... you can remove this by combining apply and strip
df['employee'] = df['employee'].apply(lambda x: x.strip())

In [7]:
df

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0
6,employee7,manager,
7,employee8,,


In [8]:
# isna() checks for null / na values and returns True, isnull() does the same
df.isna()

Unnamed: 0,employee,position,salary
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,True
7,False,True,True


In [9]:
df

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0
6,employee7,manager,
7,employee8,,


![dropna.png](attachment:dropna.png)

In [10]:
# dropping all rows with na/null values
df.dropna()

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0


In [11]:
# dropping all columns with na/null values
df.dropna(axis=1)

Unnamed: 0,employee
0,employee1
1,employee2
2,employee3
3,employee4
4,employee5
5,employee6
6,employee7
7,employee8


In [19]:
# axis 1 and threshold 7
df.dropna(axis=1, thresh=7)

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0
6,employee7,manager,47000.0
7,employee8,,47000.0


In [13]:
df

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0
6,employee7,manager,
7,employee8,,


![fillna2.png](attachment:fillna2.png)

In [14]:
df.fillna(method='ffill')

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0
6,employee7,manager,75000.0
7,employee8,manager,75000.0


In [15]:
# Filling missing salary values with the mean salary
df['salary']=df['salary'].fillna(value=df['salary'].mean())

In [16]:
df

Unnamed: 0,employee,position,salary
0,employee1,analyst,30000.0
1,employee2,manager,56000.0
2,employee3,analyst,28000.0
3,employee4,analyst,33000.0
4,employee5,manager,60000.0
5,employee6,senior manager,75000.0
6,employee7,manager,47000.0
7,employee8,,47000.0


## Links and resources:
* isnull: https://pandas.pydata.org/docs/reference/api/pandas.isnull.html
* isna: https://pandas.pydata.org/docs/reference/api/pandas.isna.html
* dropna: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dropna.html
* fillna: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.fillna.html