In [2]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.tsa.stattools import adfuller
import statsmodels.formula.api as smf
from itertools import product
from tqdm import tqdm_notebook
import warnings
warnings.filterwarnings("ignore")
sns.set_theme()

### CO2 emissions in the future

Up until now, we have looked at the current levels of CO2 emissions. However, if we assume status quo regarding factors that contribute to the rise in CO2 emissions what does the future then look like?

We investigate this by considering the CO2 emissions data as a time series. As seen from the plot of CO2 emissions below the data seems to have an exponentially increasing trend and an increasing variance over time.

In [4]:
df3 = pd.read_csv('data/annual-co-emissions-by-region.csv')
df3 = df3.rename(columns={'Annual CO2 emissions (zero filled)': "Annual CO2 emissions"})
df3 = df3.loc[df3['Entity'] == 'World']
df3 = df3.sort_values('Year')
diff1 = np.diff(np.log(df3['Annual CO2 emissions'])) 
df3['Log transformed first order differenced CO2 emissions'] = np.pad(diff1, (0, 1), 'constant')
df3['Log transformed CO2 emissions'] = np.log(df3['Annual CO2 emissions'])
model_arima = ARIMA(df3['Log transformed CO2 emissions'], order=(1,1,2))
model_fit = model_arima.fit()
y = 2020
for i in range(10):
    y = y + 1
    df3 = df3.append({'Entity': 'World', 'Code': 'OWID_WRL','Year': y, 'Annual CO2 emissions': float("NAN"), 'Log transformed first order differenced CO2 emissions': float("NAN"),'Log transformed CO2 emissions': float("NAN")}, ignore_index=True)
preds = model_fit.get_prediction(0,281)  # 95% conf
preds_ci = preds.conf_int()
preds_mu = preds.predicted_mean
df3['Predictions'] = preds_mu
df3['CI_lower'] = preds_ci[1:]['lower Log transformed CO2 emissions'].reset_index(drop=True)
df3['CI_upper'] = preds_ci[1:]['upper Log transformed CO2 emissions'].reset_index(drop=True)

In [5]:
fig = px.line(df3, x='Year', y='Annual CO2 emissions', title = 'Annual CO2 emissions')
fig.show()

To account for the increasing trend and variance and ensure stationarity we chose to model the log-transformed CO2 emissions data with an autoregressive integrated moving average model (ARIMA). To determine the order of the parameters in the ARIMA model we performed a grid search of all parameter combinations in the range of 0 to 7 where the parameter combination with the lowest AIC was chosen. The final model ended up being an ARIMA(1,1,2).

Below you can investigate the predictions of the ARIMA-model until 2030. The model predicts a small decrease in CO2 emissions in 2021 follow by an upwards going linear increase for the next 9 years.

In [14]:
# Plot 
fig = px.line(df3, x='Year', y='Annual CO2 emissions', range_x = [1920,2030], title="ARIMA predictions of annual CO2 emissions")
fig.add_scatter(x=df3['Year'], y=df3['Annual CO2 emissions'], mode='lines', name = 'Original data')
fig.add_scatter(x=df3['Year'], y=np.exp(df3['Predictions']), mode='lines', name = 'Predictions')
fig.add_traces([go.Scatter(x = df3['Year'], y = np.exp(df3['CI_upper']),
                           mode = 'lines', line_color = 'rgba(0,0,0,0)',
                           showlegend = False),
                go.Scatter(x = df3['Year'], y = np.exp(df3['CI_lower']),
                           mode = 'lines', line_color = 'rgba(0,0,0,0)',
                           name = '95% confidence interval',
                           fill='tonexty', fillcolor = 'rgba(0,100,80,0.2)')])
fig.show()

### How about the global temperature?

Well, we have just seen that assuming status quo and that CO2 emissions can be modeled by an ARIMA, the CO2 emission will increase further. We have also seen that the average global temperature and cumulative CO2 emissions have an approximately linear relationship.

Now, how would the global temperature increase in 2030 if we assume the future CO2 emissions from the ARIMA to be true?

This is investigated by inputting the ARIMA predictions into a least squares fit of the global temperature and cumulative CO2 emissions.

In [5]:
# Loading the data & pre-processing
df = pd.read_csv('../data/GlobalLandTemperaturesByCity.csv')
df['dt'] = pd.DatetimeIndex(df['dt'])
df['Year'] = pd.DatetimeIndex(df['dt']).year
df = df[df['Year'] != 1743]
df = df[df['Year'] != 2013]

# For each country average all AverageTemperature values and add iso_alpha to countries
df_avg = pd.DataFrame({'AverageTemperature': df.groupby('Country')['AverageTemperature'].mean(), 'Country': df.groupby('Country')['Country'].first()})
df_avg = df_avg.reset_index(drop=True)
#print('Average country temperature df: ', df_avg)

df_avg_city = pd.DataFrame({'AverageTemperature': df.groupby('City')['AverageTemperature'].mean(), 'City': df.groupby('City')['City'].first()})
df_avg_city = df_avg_city.reset_index(drop=True)
#print('\n \n Average city temperature df: ', df_avg_city)

# For each Country for each Year average all AverageTemperature values
D1 = df.groupby(['Year', 'Country'])['AverageTemperature'].mean().reset_index()
meantemp = D1.groupby('Year')['AverageTemperature'].mean().reset_index() # Rough - er ren average.
#meantemp1900 = meantemp[156:] # 1900-2012
#meantemp1900 = pd.read_csv('')
meantemp1900 = pd.read_csv('../data/meantemprelative1900df.csv')
meantemp1900 = meantemp1900.drop(columns = {'Unnamed: 0'})
meantemp1900

df4 = pd.read_csv('../data/annual-co-emissions-by-region.csv')
df4 = df4.rename(columns={'Annual CO2 emissions (zero filled)': "Annual CO2 emissions (tons)"})
df4 = df4.loc[df4['Entity'] == 'World']
df4 = df4.sort_values('Year')
meantemp1900co2 = pd.merge(meantemp1900, df4, left_on="Year", right_on="Year", how='inner')
meantemp1900co2 = meantemp1900co2.rename(columns={'AverageTemperature': "Average Temperature (C)"})

In [6]:
y = 2020
for i in range(10):
    y = y + 1
    df4 = df4.append({'Entity': 'World', 'Code': 'OWID_WRL','Year': y, 'Annual CO2 emissions (tons)': np.exp(np.array(df3[df3.Year == y]['Predictions'])[0])}, ignore_index=True)
    
df4['Annual CO2 emissions (tons)'] = np.cumsum(df4['Annual CO2 emissions (tons)'])
df4 = df4.rename(columns={"Annual CO2 emissions (tons)": "Cumulative CO2 emissions (tons)"})

In [7]:
# Predicting with the linear model
co2_predict = np.zeros(18)
c = 0
for i in range(2013,2031):
    co2_predict[c] = df4[df4.Year == i]['Cumulative CO2 emissions (tons)'].values[0]
    c += 1

In [8]:
def linear_fit(x):
    return 9.40088e-13 * x + 15.1500

In [9]:
pred_temp = linear_fit(co2_predict)

In [10]:
# Adding predicted values to dataframe
index = 113
year = 2014
df_pred = pd.DataFrame({'Cumulative CO2 emissions (tons)': co2_predict[0], 'Average Temperature (C)': pred_temp[0], 'Entity': 'World', 'Code': 'OWID_WRL', 'Year': year-1}, index=[0])

for i in range(1,18):
    df_pred = df_pred.append({'Cumulative CO2 emissions (tons)': co2_predict[i], 'Average Temperature (C)': pred_temp[i], 'Entity': 'World', 'Code': 'OWID_WRL', 'Year': year}, ignore_index=True)
    year += 1
    index += 1


In [11]:
meantemp1900co2 = meantemp1900co2.rename(columns={"Annual CO2 emissions (tons)": "Cumulative CO2 emissions (tons)"})
meantemp1900co2["Cumulative CO2 emissions (tons)"] = np.cumsum(meantemp1900co2["Cumulative CO2 emissions (tons)"])
df_predictions = pd.concat([meantemp1900co2, df_pred], ignore_index=True)

In [12]:
df_predictions2 = df_predictions.copy()
df_predictions2 = df_predictions2.rename(columns={"Average Temperature (C)": "temp", "Cumulative CO2 emissions (tons)": "CO2"})
model = smf.ols('temp ~ CO2', df_predictions2)
results = model.fit()
alpha = .05
predictions = results.get_prediction(df_predictions2).summary_frame(alpha)

In [13]:
fig = px.scatter(df_predictions, x="Cumulative CO2 emissions (tons)", y="Average Temperature (C)",
    color="Year", color_continuous_scale="balance",
    trendline="ols", trendline_color_override="black", title="Global temperature vs. cumulated CO2")

ci_upper = predictions['mean_ci_upper']
ci_lower = predictions['mean_ci_lower']

fig.add_traces([go.Scatter(x = df_predictions["Cumulative CO2 emissions (tons)"], y = ci_upper,
                           mode = 'lines', line_color = 'rgba(0,0,0,0)',
                           showlegend = False),
                go.Scatter(x = df_predictions["Cumulative CO2 emissions (tons)"], y = ci_lower,
                           mode = 'lines', line_color = 'rgba(0,0,0,0)',
                           name = '95% confidence interval',
                           fill='tonexty', fillcolor = 'rgba(0,100,80,0.2)')])

fig.show()

The last 10 data points in the plot above are based on inputs of cumulative CO2 emissions from the ARIMA model. Based on the ARIMA predictions of CO2 and the linear fit to average global temperature we predict the average global temperature to be 17.10 degrees celsius which is an increase of 0.76 degrees from 2012, which is the last measurement in the data set.

While an increase of 0.76 degrees doesn't seem like a major difference, it is important to remember that increased CO2 emissions more extreme temperatures both warm and cold. Thus, averaging over countries and years will naturally eliminate our ability to see these effects. 

SOMETHING CONCLUDING HERE

sources: http://web.mit.edu/jlcohen/www/papers/AML-WhitePaper.pdf