In [14]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

#import regex and math library
import re
import math



In [15]:
df = pd.read_csv('../data/Annual_Temperature.csv')
df

Unnamed: 0,ObjectId,Country,ISO2,ISO3,F2000,F2001,F2002,F2003,F2004,F2005,F2006,F2007,F2008,F2009,F2010,F2011,F2012,F2013
0,1,Afghanistan,AF,AFG,15.497833,15.778083,15.537667,14.916000,15.770917,14.980000,15.595583,15.106167,15.216500,15.257750,15.828667,15.518000,14.481583,16.533625
1,2,Albania,AL,ALB,13.746583,13.690417,13.559917,13.626583,13.258167,12.747083,12.976917,13.890500,13.955167,13.844250,13.775417,13.443250,13.768250,14.993875
2,3,Algeria,DZ,DZA,24.032250,24.724417,24.263000,24.393667,23.916000,24.222583,24.200833,24.065333,23.950250,24.154333,25.215667,24.144167,23.954833,25.121500
3,4,American Samoa,AS,ASM,27.219833,27.227583,27.537917,27.264833,27.281167,27.373500,27.081250,27.452417,26.995083,27.034250,27.453417,27.009500,27.201417,27.517250
4,5,Andorra,AD,AND,12.311917,12.216083,12.319583,12.900667,12.043000,11.643417,12.834333,12.155250,11.978583,12.566667,11.480833,12.994417,12.339917,12.307875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,223,Western Sahara,EH,ESH,23.020083,23.704917,23.457833,23.408917,23.420667,23.507667,23.441833,23.145417,23.323083,23.381083,24.114250,23.401250,23.303417,23.744250
223,224,World,,WLD,,,,,,,,,,,,,,
224,225,Yemen,YE,YEM,27.436167,27.068917,27.125167,27.357750,27.334083,27.294667,27.175000,27.310750,26.807500,27.342417,27.302750,27.288250,27.445000,28.129750
225,226,Zambia,ZM,ZMB,21.558750,21.802417,22.292417,22.156750,21.768083,22.813500,21.784250,21.838500,21.535833,21.670250,22.267500,21.771583,21.697750,21.196000


In [16]:

def getCountryModelTemp(country_code):
    #regex expression used to get all columns containing yearly data
    # column headers include F2000, F2001, F2002, F1987 so on
    #\d indicates integer in the regular expression
    regex_expr = "F\d\d\d\d"
    
    #storing all column names of the data frame in a list
    col_names = []
    for col in df.columns:
        col_names.append(col)

    #storing the column names in a string which will be used for the regular expression    
    string = ""
    for col in col_names:
        string += col

    #using the regular expression to get a list containing only the yearly values headers (e.g. F2000, F2001)   
    listReg = re.findall(regex_expr, string)
    
    # storing only the yearly values, object id, and Country headers in a new list
    dropped_cols = []
    for item in col_names: 
        if listReg.count(item) == 0 and item != 'ObjectId' and item != 'Country':
            dropped_cols.append(item)

    # dropping unused columns
    modified_df = df
    for item in dropped_cols:
        modified_df = modified_df.drop(item, axis = 1)


    # new, separate data frame without the object id and country headers
    row = modified_df[modified_df['ObjectId'] == country_code]
    row = row.drop('ObjectId', axis = 1)
    row = row.drop('Country', axis = 1)


    # getting the values in a 1D array
    temp_changes = row.values.tolist()
    temp_changes = temp_changes[0]


    #1961 is the start year, and thus our start index
    start = 1961
    x_values = []
    y_values = []

    for item in temp_changes:
        x_values.append(start)
        # append if not nan/null
        if not math.isnan(item):
            y_values.append(item)
        start += 1

    # getting the y = mx + b model 
    outer = []
    for i in range(len(y_values)):
        for x, y in zip(x_values, y_values):
            outer.append([x, y])

    dt = np.array(outer)
    x = dt[:, 0]
    y = dt[:, 1]

    theta = np.polyfit(x, y, 1)
    name = df['Country'].values[country_code - 1]
    iso = df['ISO3'].values[country_code-1]
    model_value = f'{name},{iso},{theta[0]},{theta[1]}'


    return model_value

In [17]:
print(getCountryModelTemp(117))

Lithuania,LTU,-0.015352380973628873,37.53128575597195


In [29]:
results = []
start = 1
end = 226

while start <= end:
    try:
        results.append(getCountryModelTemp(start))
    except:
        pass
    start += 1
    
for item in results:
    print(item)

Afghanistan,AFG,0.006746611780216838,2.155925250994802
Albania,ALB,0.053914377340655226,-92.41393920273914
Algeria,DZA,0.02333736252747094,-21.60491553494192
American Samoa,ASM,-0.004078571186813232,35.28526142934075
Andorra,AND,0.0008550367252754036,10.610039648020642
Angola,AGO,-0.034852106329668134,91.05115908362203
Anguilla,AIA,0.014677655560441302,-1.373989696596831
Antigua And Barbuda,ATG,0.01397902956044141,-0.1677555408827597
Argentina,ARG,-0.0029999084175799844,21.126763263731473
Armenia,ARM,0.06132417572966464,-110.63225622404374
Aruba,ABW,0.009005311010991238,10.947842252303312
Australia,AUS,-0.0008149267032934506,23.836764122301297
Austria,AUT,0.012145695945052448,-16.39257641481926
Azerbaijan,AZE,0.059253205274720014,-103.81716649658301
Bahamas,BHS,0.005364652131869962,15.263457646260134
Bahrain,BHR,0.03640998138461374,-44.3022663506561
Bangladesh,BGD,0.038616483406592864,-50.469573960328596
Barbados,BRB,0.01807481716483675,-8.221973605387722
Belarus,BLR,0.0194534798703262