In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
TC= pd.read_csv("GlobalLandTemperaturesByCountry.csv", encoding='ISO-8859-1')
TC=TC.dropna()
TC['dt'] = pd.to_datetime(TC['dt'])
TC=TC.drop(TC[TC['dt'] < "1960-01-01" ].index)
TC.set_index("dt", inplace=True)
print(TC)

            AverageTemperature  AverageTemperatureUncertainty   Country
dt                                                                     
1960-01-01              -4.380                          0.430    Ãland
1960-02-01              -5.233                          0.382    Ãland
1960-03-01              -2.362                          0.638    Ãland
1960-04-01               1.922                          0.450    Ãland
1960-05-01               8.495                          0.287    Ãland
...                        ...                            ...       ...
2013-04-01              21.142                          0.495  Zimbabwe
2013-05-01              19.059                          1.022  Zimbabwe
2013-06-01              17.613                          0.473  Zimbabwe
2013-07-01              17.000                          0.453  Zimbabwe
2013-08-01              19.759                          0.717  Zimbabwe

[155869 rows x 3 columns]


In [2]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
countries = TC.index.get_level_values('dt').unique()
unique_countries = TC['Country'].unique()
i=0
def ad_test(dataset):
     dftest = adfuller(dataset, autolag = 'AIC')
     print("1. ADF : ",dftest[0])
     print("2. P-Value : ", dftest[1])
     print("3. Num Of Lags : ", dftest[2])
     print("4. Num Of Observations Used For ADF Regression:",      dftest[3])
     print("5. Critical Values :")
     i=dftest[2]
     for key, val in dftest[4].items():
         print("\t",key, ": ", val)

    

In [None]:
from pmdarima import auto_arima
import warnings
from sklearn.metrics import mean_squared_error
from math import sqrt
for country in unique_countries:
    df=TC[TC["Country"]==country]
    if df.empty:
      continue
    ad_test(df['AverageTemperature'])
    warnings.filterwarnings("ignore")
    stepwise_fit = auto_arima(df['AverageTemperature'], trace=True, suppress_warnings=True)
    stepwise_fit.summary()
    best_model_params = stepwise_fit.get_params()
    order = best_model_params['order']
    train=df.iloc[:-30]
    test=df.iloc[-30:]
    model=ARIMA(train['AverageTemperature'],order=order)
    model=model.fit()
    model.summary()
    start=len(train)
    end=len(train)+len(test)-1
    pred=model.predict(start=start,end=end,typ='levels').rename('ARIMA Predictions')
    pred.index=df.index[start:end+1]
    pred.plot(legend=True)
    plt.clf()
    test['AverageTemperature'].plot(legend=True)
    plt.clf()
    test['AverageTemperature'].mean()
    rmse=sqrt(mean_squared_error(pred,test['AverageTemperature']))
    model2=ARIMA(df['AverageTemperature'],order=order)
    model2=model2.fit()
    index_future_dates=pd.date_range(start='2013-09-01',end='2030-12-01',freq=pd.DateOffset(months=1))
    pred2=model2.predict(start=len(df),end=len(df)+len(index_future_dates)- 1,typ='levels').rename('AverageTemperature')
    pred2.index=index_future_dates
    pred2 = pred2.to_frame()
    pred2["AverageTemperatureUncertainty"]=[rmse for _ in range(len(pred2))]
    pred2["Country"]=[country for _ in range(len(pred2))]
    TC = pd.concat([TC, pred2])

1. ADF :  -4.425845030121276
2. P-Value :  0.00026708357979543995
3. Num Of Lags :  14
4. Num Of Observations Used For ADF Regression: 629
5. Critical Values :
	 1% :  -3.4407890899441127
	 5% :  -2.8661459340578905
	 10% :  -2.5692228868342766
Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0] intercept   : AIC=2842.903, Time=0.71 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=4327.377, Time=0.05 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=3601.039, Time=0.09 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=3768.933, Time=0.17 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=4674.067, Time=0.04 sec
 ARIMA(1,0,2)(0,0,0)[0] intercept   : AIC=3358.350, Time=0.25 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=2970.350, Time=0.34 sec
 ARIMA(3,0,2)(0,0,0)[0] intercept   : AIC=2906.335, Time=0.83 sec
 ARIMA(2,0,3)(0,0,0)[0] intercept   : AIC=2741.887, Time=0.76 sec
 ARIMA(1,0,3)(0,0,0)[0] intercept   : AIC=3300.990, Time=0.30 sec
 ARIMA(3,0,3)(0,0,0)[0] intercept   : AIC=inf, Time=

 ARIMA(4,1,0)(0,0,0)[0] intercept   : AIC=621.110, Time=0.22 sec
 ARIMA(5,1,1)(0,0,0)[0]             : AIC=416.374, Time=0.44 sec

Best model:  ARIMA(5,1,1)(0,0,0)[0] intercept
Total fit time: 7.691 seconds
1. ADF :  -4.514800924160512
2. P-Value :  0.00018487900762009985
3. Num Of Lags :  19
4. Num Of Observations Used For ADF Regression: 624
5. Critical Values :
	 1% :  -3.440873084361836
	 5% :  -2.866182929627575
	 10% :  -2.5692425987220577
Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0] intercept   : AIC=2468.756, Time=0.83 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=4116.225, Time=0.08 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=3373.615, Time=0.11 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=3541.946, Time=0.15 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=5147.342, Time=0.05 sec
 ARIMA(1,0,2)(0,0,0)[0] intercept   : AIC=3083.373, Time=0.23 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=2625.438, Time=0.65 sec
 ARIMA(3,0,2)(0,0,0)[0] intercept   : AIC=262

In [None]:
pred2.to_csv('Temperature_1960_2030.csv', index=True)