In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

#import regex and math library
import re
import math

In [2]:
df = pd.read_csv('../data/Annual_Surface_Temperature_Change.csv')
df

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,...,F2012,F2013,F2014,F2015,F2016,F2017,F2018,F2019,F2020,F2021
0,1,"Afghanistan, Islamic Rep. of",AF,AFG,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,0.234,1.308,0.457,1.101,1.607,1.568,1.580,0.960,0.544,1.421
1,2,Albania,AL,ALB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.568,1.444,1.322,1.665,1.601,1.269,2.146,1.823,1.623,1.682
2,3,Algeria,DZ,DZA,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.128,1.173,1.676,1.101,1.736,1.498,1.211,1.094,1.913,2.317
3,4,American Samoa,AS,ASM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,0.646,0.883,0.554,0.394,0.924,0.820,0.574,0.924,0.815,0.653
4,5,"Andorra, Principality of",AD,AND,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.196,0.757,1.857,1.546,1.830,1.771,1.761,1.813,2.401,1.367
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,223,Western Sahara,EH,ESH,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.234,1.282,1.244,1.277,1.636,2.078,0.851,1.430,2.026,1.557
223,224,World,,WLD,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,1.058,1.007,1.042,1.406,1.658,1.424,1.284,1.449,1.713,1.442
224,225,"Yemen, Rep. of",YE,YEM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,,,,,,,,,,
225,226,Zambia,ZM,ZMB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",...,0.926,0.746,0.638,1.486,1.474,0.721,0.727,1.295,1.252,1.002


In [5]:




def getCountryModel(country_code):
    #regex expression used to get all columns containing yearly data
    # column headers include F2000, F2001, F2002, F1987 so on
    #\d indicates integer in the regular expression
    regex_expr = "F\d\d\d\d"
    
    #storing all column names of the data frame in a list
    col_names = []
    for col in df.columns:
        col_names.append(col)

    #storing the column names in a string which will be used for the regular expression    
    string = ""
    for col in col_names:
        string += col

    #using the regular expression to get a list containing only the yearly values headers (e.g. F2000, F2001)   
    listReg = re.findall(regex_expr, string)
    
    # storing only the yearly values, object id, and Country headers in a new list
    dropped_cols = []
    for item in col_names: 
        if listReg.count(item) == 0 and item != 'ObjectId' and item != 'Country':
            dropped_cols.append(item)

    # dropping unused columns
    modified_df = df
    for item in dropped_cols:
        modified_df = modified_df.drop(item, axis = 1)


    # new, separate data frame without the object id and country headers
    row = modified_df[modified_df['ObjectId'] == country_code]
    row = row.drop('ObjectId', axis = 1)
    row = row.drop('Country', axis = 1)


    # getting the values in a 1D array
    temp_changes = row.values.tolist()
    temp_changes = temp_changes[0]


    #1961 is the start year, and thus our start index
    start = 1961
    x_values = []
    y_values = []

    for item in temp_changes:
        x_values.append(start)
        # append if not nan/null
        if not math.isnan(item):
            y_values.append(item)
        start += 1

    # getting the y = mx + b model 
    outer = []
    for i in range(len(y_values)):
        for x, y in zip(x_values, y_values):
            outer.append([x, y])

    dt = np.array(outer)
    x = dt[:, 0]
    y = dt[:, 1]

    theta = np.polyfit(x, y, 1)
    name = df['Country'].values[country_code - 1]
    model_value = f'The model of {name}: {theta}'


    return model_value


In [15]:
results = []
start = 1
end = 226

while start <= end:
    results.append(getCountryModel(start))
    start += 1

In [16]:
# unit of temeprature = degree celsius 
# x = time (year)
# y = temperature change 
for item in results: 
    print(item)

The model of Afghanistan, Islamic Rep. of: [ 2.68462189e-02 -5.29391989e+01]
The model of Albania: [ 3.17259122e-02 -6.26657338e+01]
The model of Algeria: [ 3.27581174e-02 -6.45183626e+01]
The model of American Samoa: [ 1.91069858e-02 -3.76447792e+01]
The model of Andorra, Principality of: [ 3.36609730e-02 -6.63066694e+01]
The model of Angola: [ 2.63468006e-02 -5.19046440e+01]
The model of Anguilla: [ 1.97925965e-02 -3.90078465e+01]
The model of Antigua and Barbuda: [ 1.97777438e-02 -3.89818491e+01]
The model of Argentina: [ 1.34446325e-02 -2.64556567e+01]
The model of Armenia, Rep. of: [ 6.77328142e-02 -1.32721075e+02]
The model of Aruba, Kingdom of the Netherlands: [ 2.10088283e-02 -4.14006157e+01]
The model of Australia: [ 1.97647805e-02 -3.88883994e+01]
The model of Austria: [ 3.99552089e-02 -7.87641815e+01]
The model of Azerbaijan, Rep. of: [ 6.76173526e-02 -1.32473613e+02]
The model of Bahamas, The: [ 2.12482813e-02 -4.18409183e+01]
The model of Bahrain, Kingdom of: [ 3.83793231e