In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import numpy as np

df_temp = pd.read_csv("GlobalLandTemperaturesByCountry.csv")
df_emmisions = pd.read_csv("co-emissions-per-capita.csv")
locations = pd.read_csv("CountryLocations.csv", delimiter = "\t")

In [2]:
locations = locations.rename(columns={"name":"Country"})
del locations["country"]

# Editing temperature and emissions data

In [3]:
# Temperature dataframe converting the date 
def date_converter(date):
    if isinstance(date,str): # checks if the date is of string type
        year = float(date[:4])
    return year


#Renaming dt column to Year
df_temp = df_temp.rename(columns={"dt":"Year"})

df_temp['Year'] = df_temp['Year'].apply(date_converter).astype(int).astype(str)

# Gets rid of rows in the dataframe with a NaN element
no_NaN_df = df_temp.dropna()

df_temp = no_NaN_df.groupby(['Country', 'Year'], as_index=False).mean()

# Dropping Code column from the emissions dataframe
df_emmisions = df_emmisions.drop(['Code'], axis=1)

# Changing df_emmisions dataframe column Entity to Country
df_emmisions = df_emmisions.rename(columns={"Entity":"Country"})

df_emmisions['Year'] = df_emmisions['Year'].values.astype(str)

# Combining the two dataframes into one

In [4]:
emission_years = df_emmisions['Year'].to_list()
emission_distinct_years = list(set(emission_years))

df_combined = pd.merge(df_temp,df_emmisions, on=["Year", "Country"])
df_combined = pd.merge(df_combined, locations, on="Country")
del df_combined["AverageTemperatureUncertainty"]
df_combined

Unnamed: 0,Country,Year,AverageTemperature,Per capita CO2 emissions,latitude,longitude
0,Afghanistan,1949,13.350083,0.001912,33.939110,67.709953
1,Afghanistan,1950,13.043500,0.010871,33.939110,67.709953
2,Afghanistan,1951,13.967750,0.011684,33.939110,67.709953
3,Afghanistan,1952,14.175417,0.011542,33.939110,67.709953
4,Afghanistan,1953,14.650750,0.013216,33.939110,67.709953
...,...,...,...,...,...,...
18631,Zimbabwe,2009,21.377250,0.437129,-19.015438,29.154857
18632,Zimbabwe,2010,21.986250,0.619328,-19.015438,29.154857
18633,Zimbabwe,2011,21.602417,0.735250,-19.015438,29.154857
18634,Zimbabwe,2012,21.521333,0.583977,-19.015438,29.154857


# Classifier
Creating a classifier representative of a Linear Regression Model
This model will use the countries Per Capita CO2 emissions, the Year, and the Latitude of the country to predict the Average temperature of that country. 
This model will evaluate whether there is a link between the CO2 emissions of a country and their temperature. It has been suggested throughout the globe that the increasing in carbon emissions has increased the global temperature of the Earth resulting in the beginning stages of global warming. 

In [9]:
model = Pipeline([
    ("poly", PolynomialFeatures(degree=2, include_bias=False)),
    ("model", LinearRegression()),
])
xcols = ["Per capita CO2 emissions", "Year", "latitude"]
ycols = ["AverageTemperature"]

### Calculating the scores of each country and placing the mean of the scores into a dictionary consisting of the Country and their respective score correlating to the predicted vs actual temperature

In [29]:
all_countries = list(set(df_combined["Country"]))
def country_cal(country):
    country_df = df_combined[df_combined["Country"] == country]
    train, test = train_test_split(country_df)
    scores = cross_val_score(model, train[xcols], train[ycols])
    return np.abs(scores).mean()

vals = {}
for c in all_countries:
    v = country_cal(c)
    if v > 0 and v <= 1:
        vals[c] = v
vals = sorted(vals.items(), key=lambda x:x[1], reverse=True)
sort_vals = dict(vals)
i = 0
for key, value in sort_vals.items():
    i+=1
    print(key + ": " + str(value))
    if value<.50:
        break

Botswana: 0.987081204390903
Malta: 0.9759112351494721
Andorra: 0.9694900451288417
Cambodia: 0.9506239849189058
Fiji: 0.8553292936197525
Niger: 0.7938208567265848
Nepal: 0.7897504724421498
Senegal: 0.7576539932888273
Aruba: 0.7303270923240929
Malaysia: 0.7274958697251928
Uganda: 0.7266775157973877
Indonesia: 0.7135751792335677
Djibouti: 0.6818728598699556
Philippines: 0.6677156198584119
Sri Lanka: 0.6344995329066186
Liberia: 0.6213637261327447
Vietnam: 0.6105392872834223
Montserrat: 0.6020243220268479
United Arab Emirates: 0.5988475703018135
Ethiopia: 0.5974925570807808
Togo: 0.5832359395455539
Eritrea: 0.5823734884022199
Qatar: 0.5815507194489329
Burundi: 0.5807402559101472
Peru: 0.5631293391329921
South Africa: 0.5629499730235598
Cameroon: 0.561311615600879
Rwanda: 0.5588240965602057
Brazil: 0.5564076345652819
Bangladesh: 0.5555469016450939
Ghana: 0.5476620871357596
Italy: 0.5436140273387008
Morocco: 0.5421955748689912
Seychelles: 0.539105369675868
Samoa: 0.5358583614200357
Australia:

In [33]:
print(str(i) + " countries have an average score above 50% percent.")

48 countries have an average score above 50% percent.


### Analysis
By using the Year, CO2 emmisions per Capita, and the latitude of the country, the model is able to predict the Average Temperature of the 48 countries with more than 50% accuracy 

In conclusion, there does seem to be a correlation between a countries Co2 emmissions per Capita and their average temperature. Therefore, with the world's CO2 emissions increasing due to increasing of production, the average global temperature is also increasing since their is a direct relationship between these two values. This means that the polar ice caps will begin to melt because of the rise in temperature which will cause a rise in sea levels. So humans are in danger of cities going underwater if we do not tackle climate change because it is an impending danger that can cause a lot of damage.  