### Countries with co2 data in 1850
HUNGARY, 
CANADA,
USSR,
NORWAY,
SWEDEN,
DENMARK,
IRELAND,
SPAIN,
AUSTRIA,
NETHERLANDS,
POLAND,
BELGIUM,
GERMANY,
FRANCE,
USA,
UK


In [None]:
import pandas as pd
import numpy as np
import scipy as sp

In [None]:
# Store filepath in a variable: 
# parse_dates=["year"] or dtype=object
myCO2="./source/myCO2.csv"
co2_df = pd.read_csv(myCO2,parse_dates=["year"])
co2_df.info()
co2_df.head(5)

In [None]:
# # convert co2 to a float
# co2_df["co2"] = co2_df["co2"].astype(float)
# co2_df.head(5)

In [None]:
# Store filepath in a variable: 
# parse_dates=["year"] or dtype=object
myTmps="./source/myYearTempsF.csv"
tmps_df = pd.read_csv(myTmps,parse_dates=["year"])
tmps_df.info()

In [None]:
# convert temps to floats and country to upper case for merging
# tmps_df["avg_yly_tmp_f"]= tmps_df["avg_yly_tmp_f"].astype(float)
# tmps_df["avg_yly_tmp_c"]= tmps_df["avg_yly_tmp_c"].astype(float)
# tmps_df["avg_yly_tmp_uncertainty"]= tmps_df["avg_yly_tmp_uncertainty"].\
#                                       astype(float)
tmps_df["country"]= tmps_df["country"].apply(lambda x: x.upper())
tmps_df.info()
tmps_df.head(5)    

In [None]:
# Merge the two dataframes on the "year" and "country" fields

co2_tmp_df = pd.merge(co2_df, tmps_df, on=(["year","country"]),\
                      how="inner", suffixes=("_x", "_y"))
print(len(co2_tmp_df.index))                     
co2_tmp_df.head(5)

In [None]:
# Export file as an XLSX or CSV, w/o index, w/ header
co2_tmp_df.to_csv("../output/co2_tmp_1.csv", index=False, header=True)

In [None]:
check_counts = co2_tmp_df.groupby(["country"],\
                as_index=False)["avg_yly_tmp_f"].\
                count().rename(columns={"avg_yly_tmp_f":"count"})
check_counts.head(5)

In [None]:
# Export file as an XLSX or CSV, w/o index, w/ header
check_counts.to_excel("../output/check_counts.xlsx",\
                    index=False, header=True)

In [None]:
# Create a dataframe of just numbers
co2_tmp = co2_tmp_df[["co2","avg_yly_tmp_f","avg_yly_tmp_c",\
                      "avg_yly_tmp_uncertainty"]]
co2_tmp.head(5)

In [None]:
co2_tmp.corr()

In [None]:
# Need to do this in one step
co2tmp1 = co2_tmp_df.loc[co2_tmp_df["country"] == 'CANADA']
co2tmp1 = co2tmp1.loc[co2_tmp_df["year"]>= '1852-01-01'].\
                      reset_index(drop=True)
print(len(co2tmp1.index))
co2tmp1.head(5)

In [None]:
# Get the min and max date range
co2tmp1_min_date = co2tmp1["year"].min()
co2tmp1_max_date = co2tmp1["year"].max()
print("min year " + str(co2tmp1_min_date) + " max year " + \
      str(co2tmp1_max_date))

In [None]:
co2tmp1.corr()

In [None]:
from pandas.plotting import lag_plot
from matplotlib.figure import Figure
import matplotlib.pyplot as plt

plt.figure()
data = co2tmp1["co2"]
lag_plot(data)

plt.tight_layout()
plt.savefig("canada_lag-plot_co2.png")
plt.show()

In [None]:
co2tmp2 = co2_tmp_df.loc[co2_tmp_df["country"] == 'SPAIN']
co2tmp2 = co2tmp2.loc[co2_tmp_df["year"]>= '1852-01-01'].\
                      reset_index(drop=True)
print(len(co2tmp2.index))
co2tmp2.head(5)

In [None]:
# Get the min and max date range
co2tmp2_min_date = co2tmp2["year"].min()
co2tmp2_max_date = co2tmp2["year"].max()
print("min year " + str(co2tmp2_min_date) + " max year " + \
      str(co2tmp2_max_date))

In [None]:
co2tmp2.corr()

In [None]:
from pandas.plotting import lag_plot
from matplotlib.figure import Figure
import matplotlib.pyplot as plt

plt.figure()
data = co2tmp2["co2"]
lag_plot(data)

plt.tight_layout()
plt.savefig("spain_lag-plot_co2.png")
plt.show()

In [None]:
co2tmp3 = co2_tmp_df.loc[co2_tmp_df["country"] == 'AUSTRALIA'].\
                                    reset_index(drop=True)
print(len(co2tmp3.index))
co2tmp3.head(5)

In [None]:
# Get the min and max date range
co2tmp3_min_date = co2tmp3["year"].min()
co2tmp3_max_date = co2tmp3["year"].max()
print("min year " + str(co2tmp3_min_date) + " max year " + \
      str(co2tmp3_max_date))

In [None]:
co2tmp3.corr()

In [None]:
from pandas.plotting import lag_plot
from matplotlib.figure import Figure
import matplotlib.pyplot as plt

plt.figure()
data = co2tmp3["co2"]
lag_plot(data)

plt.tight_layout()
plt.savefig("australia_lag-plot_co2.png")
plt.show()

In [None]:
# Dependencies
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime, date, time
import matplotlib.dates as mdates

fig = plt.figure(figsize=(12,10))
ax = fig.add_subplot(111)

x_axis = co2tmp1["year"]  
y1_axis = co2tmp1["co2"]
y2_axis = co2tmp2["co2"]
y3_axis = co2tmp3["co2"]


ax.set_title("Canada/Spain/Australia co2 by Year",\
             weight='bold').set_fontsize('18')
ax.set_xlabel("Year", weight='bold').set_fontsize('14')
ax.set_ylabel("co2", weight='bold').set_fontsize('14')


ax.grid(True)
ax.set_facecolor("whitesmoke")
fig.autofmt_xdate(rotation=45, ha='right')

ax.plot(x_axis, y1_axis, color="red", linewidth= 1)
ax.plot(x_axis, y2_axis, color="green", linewidth= 1)
ax.plot(x_axis, y3_axis, color="blue", linewidth= 1)

ax.legend(["Canada co2","Spain co2","Australia co2"],
          loc="best",bbox_to_anchor=(1, 0.5))

plt.tight_layout()
plt.savefig("canada_spain_australia_yearly_co2_chart.png")
plt.show()