# Analysing Time Series Data

Contents
- 01 Importing libraries and data
- 02 Subsetting, wrangling and cleaning time series
- 03 Time series analysis : decompostion
- 04 Testing for stationarity 
- 05 Standardising time series data 


## 01 Importing libraries and data 

In [1]:
import quandl
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import statsmodels.api as sm # Using .api imports the public access version of statsmodels, which is a library that handles 
# statistical models.
import os
import warnings # This is a library that handles warnings.

warnings.filterwarnings("ignore")

plt.style.use('fivethirtyeight') 

In [2]:
%matplotlib inline

In [3]:
# Define path

path = r'//Users/amypalomino/Documents/June 22 Final Project/02 Data'

In [4]:
# Import dataframe 

df = pd.read_csv(os.path.join(path, 'Original Data', 'FAOSTAT_data_6-6-2022.csv'),
                index_col = False)

## 02 Wrangling, subsetting and cleaning


In [5]:
df.head()

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Months Code,Months,Year Code,Year,Unit,Value,Flag,Flag Description
0,ET,Temperature change,2,Afghanistan,7271,Temperature change,7001,January,1970,1970,°C,0.824,Fc,Calculated data
1,ET,Temperature change,2,Afghanistan,7271,Temperature change,7001,January,1971,1971,°C,-1.465,Fc,Calculated data
2,ET,Temperature change,2,Afghanistan,7271,Temperature change,7001,January,1972,1972,°C,-1.237,Fc,Calculated data
3,ET,Temperature change,2,Afghanistan,7271,Temperature change,7001,January,1973,1973,°C,-2.871,Fc,Calculated data
4,ET,Temperature change,2,Afghanistan,7271,Temperature change,7001,January,1974,1974,°C,-1.181,Fc,Calculated data


In [6]:
df.dtypes

Domain Code          object
Domain               object
Area Code (FAO)       int64
Area                 object
Element Code          int64
Element              object
Months Code           int64
Months               object
Year Code             int64
Year                  int64
Unit                 object
Value               float64
Flag                 object
Flag Description     object
dtype: object

In [7]:
df.describe()

Unnamed: 0,Area Code (FAO),Element Code,Months Code,Year Code,Year,Value
count,200362.0,200362.0,200362.0,200362.0,200362.0,192655.0
mean,129.99474,7271.0,7009.882353,1996.188698,1996.188698,0.596271
std,75.533525,0.0,6.037957,14.952114,14.952114,1.037985
min,1.0,7271.0,7001.0,1970.0,1970.0,-9.186
25%,64.0,7271.0,7005.0,1983.0,1983.0,0.036
50%,130.0,7271.0,7009.0,1997.0,1997.0,0.531
75%,193.0,7271.0,7016.0,2009.0,2009.0,1.096
max,299.0,7271.0,7020.0,2021.0,2021.0,11.752


In [8]:
df.tail()

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Months Code,Months,Year Code,Year,Unit,Value,Flag,Flag Description
200357,ET,Temperature change,181,Zimbabwe,7271,Temperature change,7020,Meteorological year,2017,2017,°C,0.116,Fc,Calculated data
200358,ET,Temperature change,181,Zimbabwe,7271,Temperature change,7020,Meteorological year,2018,2018,°C,0.405,Fc,Calculated data
200359,ET,Temperature change,181,Zimbabwe,7271,Temperature change,7020,Meteorological year,2019,2019,°C,0.939,Fc,Calculated data
200360,ET,Temperature change,181,Zimbabwe,7271,Temperature change,7020,Meteorological year,2020,2020,°C,0.415,Fc,Calculated data
200361,ET,Temperature change,181,Zimbabwe,7271,Temperature change,7020,Meteorological year,2021,2021,°C,-0.101,Fc,Calculated data


In [9]:
for col in df.columns.tolist():
  weird = (df[[col]].applymap(type) != df[[col]].iloc[0].apply(type)).any(axis = 1)
  if len (df[weird]) > 0:
    print (col)

In [10]:
df.isnull().sum()

Domain Code            0
Domain                 0
Area Code (FAO)        0
Area                   0
Element Code           0
Element                0
Months Code            0
Months                 0
Year Code              0
Year                   0
Unit                   0
Value               7707
Flag                   0
Flag Description       0
dtype: int64

In [11]:
df_dups = df[df.duplicated()]
df_dups

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Months Code,Months,Year Code,Year,Unit,Value,Flag,Flag Description


In [12]:
df['Domain Code'].value_counts(dropna = False)

ET    200362
Name: Domain Code, dtype: int64

In [13]:
df['Element'].value_counts(dropna = False)

Temperature change    200362
Name: Element, dtype: int64

In [14]:
df['Months'].value_counts(dropna = False)

January                11786
October                11786
Sep–Oct–Nov            11786
Jun–Jul–Aug            11786
Mar–Apr–May            11786
Dec–Jan–Feb            11786
December               11786
November               11786
September              11786
February               11786
August                 11786
July                   11786
June                   11786
May                    11786
April                  11786
March                  11786
Meteorological year    11786
Name: Months, dtype: int64

In [15]:
df['Flag'].value_counts(dropna = False)

Fc    192655
NV      7707
Name: Flag, dtype: int64

In [16]:
df['Flag Description'].value_counts(dropna = False)

Calculated data       192655
Data not available      7707
Name: Flag Description, dtype: int64

In [17]:
df.columns

Index(['Domain Code', 'Domain', 'Area Code (FAO)', 'Area', 'Element Code',
       'Element', 'Months Code', 'Months', 'Year Code', 'Year', 'Unit',
       'Value', 'Flag', 'Flag Description'],
      dtype='object')

In [18]:
df = df.drop(columns = ['Domain Code', 'Domain','Element Code','Year Code','Flag'])

In [19]:
df.shape

(200362, 9)

In [20]:
df.head(5)

Unnamed: 0,Area Code (FAO),Area,Element,Months Code,Months,Year,Unit,Value,Flag Description
0,2,Afghanistan,Temperature change,7001,January,1970,°C,0.824,Calculated data
1,2,Afghanistan,Temperature change,7001,January,1971,°C,-1.465,Calculated data
2,2,Afghanistan,Temperature change,7001,January,1972,°C,-1.237,Calculated data
3,2,Afghanistan,Temperature change,7001,January,1973,°C,-2.871,Calculated data
4,2,Afghanistan,Temperature change,7001,January,1974,°C,-1.181,Calculated data


In [25]:
df.groupby('Year').agg({'Months':['count']})

Unnamed: 0_level_0,Months
Unnamed: 0_level_1,count
Year,Unnamed: 1_level_2
1970,3621
1971,3621
1972,3621
1973,3621
1974,3621
1975,3621
1976,3621
1977,3621
1978,3621
1979,3621
