In [21]:
# Dependencies and Setup
import pandas as pd
from datetime import datetime 
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

In [22]:
# Read matches_by_teams.csv File and store into Pandas data frame
file_to_load = 'fertilizer_data.csv'
fertilizer_data = pd.read_csv(file_to_load)
fertilizer_data.head()

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,RFN,Fertilizers by Nutrient,10,Australia,5510,Production,3102,Nutrient nitrogen N (total),1990,1990,tonnes,224500.0,X,International reliable sources
1,RFN,Fertilizers by Nutrient,10,Australia,5510,Production,3102,Nutrient nitrogen N (total),1991,1991,tonnes,229500.0,X,International reliable sources
2,RFN,Fertilizers by Nutrient,10,Australia,5510,Production,3102,Nutrient nitrogen N (total),1992,1992,tonnes,224100.0,X,International reliable sources
3,RFN,Fertilizers by Nutrient,10,Australia,5510,Production,3102,Nutrient nitrogen N (total),1993,1993,tonnes,232300.0,X,International reliable sources
4,RFN,Fertilizers by Nutrient,10,Australia,5510,Production,3102,Nutrient nitrogen N (total),1994,1994,tonnes,228900.0,X,International reliable sources


In [23]:
for col in fertilizer_data.columns:
    print(col)

Domain Code
Domain
Area Code (FAO)
Area
Element Code
Element
Item Code
Item
Year Code
Year
Unit
Value
Flag
Flag Description


In [24]:
#Check for NaN and blank data
fertilizer_data.isnull().sum()

Domain Code         0
Domain              0
Area Code (FAO)     0
Area                0
Element Code        0
Element             0
Item Code           0
Item                0
Year Code           0
Year                0
Unit                0
Value               0
Flag                0
Flag Description    0
dtype: int64

In [25]:
clean_fertdata = fertilizer_data.drop(columns=["Domain Code", "Domain Code", "Domain", "Area Code (FAO)", "Area", 
                                       "Element Code", "Item Code", 
                                  "Year Code", "Flag","Unit", "Flag Description"])

In [26]:
clean_fertdata.head()

Unnamed: 0,Element,Item,Year,Value
0,Production,Nutrient nitrogen N (total),1990,224500.0
1,Production,Nutrient nitrogen N (total),1991,229500.0
2,Production,Nutrient nitrogen N (total),1992,224100.0
3,Production,Nutrient nitrogen N (total),1993,232300.0
4,Production,Nutrient nitrogen N (total),1994,228900.0


In [27]:
clean_fertdata.rename(columns={"Value": "Value(tonnes)"}, inplace = True)

In [28]:
clean_fertdata.head()

Unnamed: 0,Element,Item,Year,Value(tonnes)
0,Production,Nutrient nitrogen N (total),1990,224500.0
1,Production,Nutrient nitrogen N (total),1991,229500.0
2,Production,Nutrient nitrogen N (total),1992,224100.0
3,Production,Nutrient nitrogen N (total),1993,232300.0
4,Production,Nutrient nitrogen N (total),1994,228900.0


In [29]:
print(clean_fertdata)

              Element                         Item  Year  Value(tonnes)
0          Production  Nutrient nitrogen N (total)  1990      224500.00
1          Production  Nutrient nitrogen N (total)  1991      229500.00
2          Production  Nutrient nitrogen N (total)  1992      224100.00
3          Production  Nutrient nitrogen N (total)  1993      232300.00
4          Production  Nutrient nitrogen N (total)  1994      228900.00
..                ...                          ...   ...            ...
337  Agricultural Use  Nutrient potash K2O (total)  2015      239894.89
338  Agricultural Use  Nutrient potash K2O (total)  2016      255367.97
339  Agricultural Use  Nutrient potash K2O (total)  2017      273126.19
340  Agricultural Use  Nutrient potash K2O (total)  2018      293693.53
341  Agricultural Use  Nutrient potash K2O (total)  2019      288016.25

[342 rows x 4 columns]


In [31]:
##our aim is to focus on fertilizer used for crop agricultural use, hence remove the production,import quantity and export quantity
clean_fertdata.drop(clean_fertdata.index[clean_fertdata['Element'] == 'Export Quantity'], inplace=True)
clean_fertdata.drop(clean_fertdata.index[clean_fertdata['Element'] == 'Production'], inplace=True)
clean_fertdata.drop(clean_fertdata.index[clean_fertdata['Element'] == 'Import Quantity'], inplace=True)

In [32]:
print(clean_fertdata)

              Element                         Item  Year  Value(tonnes)
90   Agricultural Use  Nutrient nitrogen N (total)  1990      439400.00
91   Agricultural Use  Nutrient nitrogen N (total)  1991      462300.00
92   Agricultural Use  Nutrient nitrogen N (total)  1992      488200.00
93   Agricultural Use  Nutrient nitrogen N (total)  1993      565400.00
94   Agricultural Use  Nutrient nitrogen N (total)  1994      583200.00
..                ...                          ...   ...            ...
337  Agricultural Use  Nutrient potash K2O (total)  2015      239894.89
338  Agricultural Use  Nutrient potash K2O (total)  2016      255367.97
339  Agricultural Use  Nutrient potash K2O (total)  2017      273126.19
340  Agricultural Use  Nutrient potash K2O (total)  2018      293693.53
341  Agricultural Use  Nutrient potash K2O (total)  2019      288016.25

[90 rows x 4 columns]


In [35]:
clean_fertdata = clean_fertdata.drop(columns= ["Element"])

In [36]:
print(clean_fertdata)

                            Item  Year  Value(tonnes)
90   Nutrient nitrogen N (total)  1990      439400.00
91   Nutrient nitrogen N (total)  1991      462300.00
92   Nutrient nitrogen N (total)  1992      488200.00
93   Nutrient nitrogen N (total)  1993      565400.00
94   Nutrient nitrogen N (total)  1994      583200.00
..                           ...   ...            ...
337  Nutrient potash K2O (total)  2015      239894.89
338  Nutrient potash K2O (total)  2016      255367.97
339  Nutrient potash K2O (total)  2017      273126.19
340  Nutrient potash K2O (total)  2018      293693.53
341  Nutrient potash K2O (total)  2019      288016.25

[90 rows x 3 columns]


In [37]:
clean_fertdata.to_csv('fertilizer_data_clean.csv')