# `.fillna()` to fill NULL values

### Import Pandas

In [1]:
import pandas as pd

### U.S. Major League Soccer Salaries

In [2]:
salaries = pd.read_csv("mls_salaries.csv")

### NaN values in the tail

In [3]:
# called the head method to see what were working with
salaries.tail(3)

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
612,VAN,Waston,Kendall,D,350000.0,368125.0
613,,,,,,
614,VAN,Williams,Sheanon,D,175000.0,184000.0


### Replace NaN values with 0 using `fillna()`

In [4]:
# replaces aall NaN values with 0
salaries.fillna(0) # not permanent, no inplace used

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,ATL,Almiron,Miguel,M,1912500.0,2297000.00
1,ATL,Ambrose,Mikey,D,65625.0,65625.00
2,ATL,Asad,Yamil,M,150000.0,150000.00
3,ATL,Bloom,Mark,D,99225.0,106573.89
4,ATL,Carleton,Andrew,F,65000.0,77400.00
...,...,...,...,...,...,...
610,VAN,Teibert,Russell,M,126500.0,194000.00
611,VAN,Tornaghi,Paolo,GK,80000.0,80000.00
612,VAN,Waston,Kendall,D,350000.0,368125.00
613,0,0,0,0,0.0,0.00


### This makes more sense for categorical data types
When working with a mixed data types DataFrame, you might want to work on certain columns, not the whole DataFrame. For example, 0 works in the salary and compensation columns but not really for position or club. Try something more semantic like "none".

In [5]:
# examine the position column
salaries["position"]

0        M
1        D
2        M
3        D
4        F
      ... 
610      M
611     GK
612      D
613    NaN
614      D
Name: position, Length: 615, dtype: object

In [6]:
# you can also specify which column(s) to target
salaries["position"].fillna("none", inplace=True) # permanent

In [8]:
salaries["position"]

0         M
1         D
2         M
3         D
4         F
       ... 
610       M
611      GK
612       D
613    none
614       D
Name: position, Length: 615, dtype: object

### Try `base_salary` next

In [11]:
salaries["base_salary"]

0      1912500.0
1        65625.0
2       150000.0
3        99225.0
4        65000.0
         ...    
610     126500.0
611      80000.0
612     350000.0
613          NaN
614     175000.0
Name: base_salary, Length: 615, dtype: float64

In [12]:
salaries["base_salary"].fillna(0, inplace=True) # permanent

In [13]:
salaries["base_salary"]

0      1912500.0
1        65625.0
2       150000.0
3        99225.0
4        65000.0
         ...    
610     126500.0
611      80000.0
612     350000.0
613          0.0
614     175000.0
Name: base_salary, Length: 615, dtype: float64