# Scenario based forecasting application on European Union carbon emissions from 2019 to 2030

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import pickle
import scipy.stats

import plotly.graph_objects as go
import plotly
from plotly.subplots import make_subplots
plotly.offline.init_notebook_mode(connected=True)
np.seterr(divide='ignore', invalid='ignore')

import plotly.express as px


ssp1 = ['pop_SSP1.csv', 'gdp_cap_SSP1.csv', 'EnergyIntensity_SSP1.csv',
        'oil_SSP1.csv', 'coal_SSP1.csv', 'gas_SSP1.csv', 'total_cons_SSP1.csv']
ssp2 = ['pop_SSP2.csv', 'gdp_cap_SSP2.csv', 'EnergyIntensity_SSP2.csv',
        'oil_SSP2.csv', 'coal_SSP2.csv', 'gas_SSP2.csv', 'total_cons_SSP2.csv']
ssp3 = ['pop_SSP3.csv', 'gdp_cap_SSP3.csv', 'EnergyIntensity_SSP3.csv',
        'oil_SSP3.csv', 'coal_SSP3.csv', 'gas_SSP3.csv', 'total_cons_SSP3.csv']
ssp4 = ['pop_SSP4.csv', 'gdp_cap_SSP4.csv', 'EnergyIntensity_SSP4.csv',
        'oil_SSP4.csv', 'coal_SSP4.csv', 'gas_SSP4.csv', 'total_cons_SSP4.csv']
ssp5 = ['pop_SSP5.csv', 'gdp_cap_SSP5.csv', 'EnergyIntensity_SSP5.csv',
        'oil_SSP5.csv', 'coal_SSP5.csv', 'gas_SSP5.csv', 'total_cons_SSP5.csv']
list_ssp = [ssp1, ssp2, ssp3, ssp4, ssp5]


# File containing the projections (2018-2030) of the different variables 
file_proj = "proj_values_EU28/"

# File containing the projections rates (2018-2030) of the different variables 
file_rates = "proj_rates/"

# real projections of the carbon emissions
file_real_proj = "proj_values_EU28/raw_co2R_SSP"

# parameters of the normal distributions for the emission factors (coal, gas, oil)
file_emi_factor_normal = "fossil_fuel_emi_factors_normal_param.csv"
# parameters of the normal distributions for the emission factors (coal, gas, oil))
file_emi_factor_tri = "/home/cj/big-data/06_MILESTONES/fossil_fuel_emi_factors_triangular_param.csv"
file_emi_factor = {'normal':file_emi_factor_normal, 'triangular':file_emi_factor_tri}

years = list()
for i in range(13) : years.append(2018+i)
list_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
               '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '17becf']

value_1990 = 4408526411.17188*10**(-6) # from ktoe to Mtoe
target_2030 = value_1990-0.40*(value_1990)
target_2030_ = list()
for i in range(len(years)): target_2030_.append(target_2030)

In [2]:
def get_mean_std(file_, scenario, rates=False):
    df_mean = pd.DataFrame({})
    df_std = pd.DataFrame({})
    
    for file in scenario:
        df = pd.read_csv(file_ + file, index_col=['Unnamed: 0'])
        m = df.mean()
        col_name = file.split('_')[0]
        df_mean[col_name] = m

        std = df.std()
        col_name = file.split('_')[0]
        df_std[col_name] = std
    if rates is True:
        df_mean.index = np.arange(2019, 2032)
        df_std.index = np.arange(2019, 2032)
        df_mean = df_mean[0:13]
        df_std = df_std[0:13]
    else:
        df_mean.index = np.arange(2018, 2031)
        df_std.index = np.arange(2018, 2031)
        
    
    names_col = ['pop','gdp', 'ener_int', 'cons_oil', 'cons_coal', 'cons_gas', 'total_cons']
    df_mean.columns = names_col
    df_std.columns = names_col

    return df_mean, df_std

def get_emi_factor_distribution(distri):
    file = file_emi_factor[distri]
    params = pd.read_csv(file, index_col=0)
    return params.transpose()    

def kaya_identity(cons_oil, cons_coal, cons_gas, emi_fact_coal, emi_fact_oil, emi_fact_gas):
    return ((cons_oil*emi_fact_oil)+(cons_coal*emi_fact_oil)+(cons_gas*emi_fact_oil))

def kaya_identity_rates(gdp_rates, ei_rates, ci_rates):
    ci_rates=0
    return (1+gdp_rates)*(1+ei_rates)*(1+ci_rates)-1

def mean_forecast(x):
    mean_forecast = list()
    for i in range(13) : mean_forecast.append(np.mean(x[i]))
    return mean_forecast

def show_average_forecast(list_forecast_ssp, name):
    fig = go.Figure()
    for i in range(len(list_forecast_ssp)):
        fig.add_trace(go.Scatter(x=years, y=list_forecast_ssp[i], opacity=0.8,
                                 line=dict(color=list_colors[i]), name= "ssp"+str(i+1)))
        
    fig.add_trace(go.Scatter(x=years, y=target_2030_, opacity=0.8,
                             line=dict(color="Black"), name="EU target"))
    
    # Use date string to set xaxis range
    fig.update_layout(
        title_text="Forecasts of CO2 emissions in function of the family of scenarios based on "+name)


    fig.update_xaxes(title_text="Years")
    fig.update_yaxes(title_text="CO2 emissions [MtCO2eq]"),
    #fig.update_layout(legend_orientation="h")
    fig.show()
    
def show_projection_CO2(list_projection_CO2):
    fig = go.Figure()
    for i in range(5):
        fig.add_trace(go.Scatter(x=years, y=list_projection_CO2[i], opacity=0.8,
                                 line=dict(color=list_colors[i]), name="SSP"+str(i+1)))
    
    fig.add_trace(go.Scatter(x=years, y=target_2030_, opacity=0.8,
                             line=dict(color="Black"), name="EU target"))
    
    # Use date string to set xaxis range
    fig.update_layout(title_text="CO2 emission projections for each SSPS Family")


    fig.update_xaxes(title_text="Years")
    fig.update_yaxes(title_text="CO2 emissions [MtCO2eq]"),
    #fig.update_layout(legend_orientation="h")
    fig.show()

    
def projection_vs_forecasts(emi, name, show=False):
    
    list_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
                   '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '17becf']
    
    fig = go.Figure()
    
    fig = make_subplots(rows=3, cols=2, subplot_titles=("SSP1", "SSP2", "SSP3", "SSP4", "SSP5"))
    
    roww = [1, 1, 2, 2, 3]
    coll = [1, 2, 1, 2, 1]
    
    for i in range(len(coll)):
        for j in range(len(emi)):
            y=emi[j][i]
            fig.add_trace(go.Scatter(x=years, y=y, opacity=0.8, line=dict(color=list_colors[j+2]), 
                                    name=name[j]), row=roww[i], col=coll[i])
            
        fig.add_trace(go.Scatter(x=years, y=target_2030_, opacity=0.8,
                             line=dict(color="Black"), name="EU target"), row=roww[i], col=coll[i])
                            
        fig.update_xaxes(title_text="Years", row=roww[i], col=coll[i])
        fig.update_yaxes(title_text="CO2 emissions [MtCO2eq]", row=roww[i], col=coll[i])
    
    fig.update_layout(title_text="Comparison between forecasts with projections for each SSP family",
                      height=1000, width=1000, showlegend=True, legend_orientation="h")
    if show is True:
        fig.show()
    
def show_real_projections(file, show=False):
    list_file = list()
    projection_CO2 = list()
    for i in range(5):
        list_file.append(file+str(i+1)+".csv")
        projection_CO2.append(pd.read_csv(list_file[i]).iloc[0][1:])
    
    if show is True:
        show_projection_CO2(projection_CO2)
    
    return projection_CO2

def show_goals(emi_list, name, show=False):
    fig = go.Figure()
    fig = make_subplots(rows=3, cols=2, subplot_titles=("SSP1", "SSP2", "SSP3", "SSP4", "SSP5"))
    
    roww = [1, 1, 2, 2, 3]
    coll = [1, 2, 1, 2, 1]

    x_name = name
  
    for i in range(len(coll)):
        values = list()
        color = list()
        for j in range(len(emi_list)):
            values.append(emi_list[j][i][11])
            color.append(list_colors[j+1])
        values.append(target_2030)
        color.append(list_colors[0])
    
        fig.add_trace(go.Bar(x=x_name, y=values, marker_color = color, textposition='auto'),
                      row=roww[i], col=coll[i])
    
        fig.update_xaxes(row=roww[i], col=coll[i])
        fig.update_yaxes(title_text="CO2 emissions [MtCO2eq2]", row=roww[i], col=coll[i])
        
    fig.update_layout(title_text="Target set by the European Union for CO2 emissions"+
                      " in 2030 compared to forecasts and projections",
                      height=1000, width=1000, showlegend=False, legend_orientation="h")
    if show is True:
        fig.show()

def from_rates_to_data(emi, val_2018):
    emi[0] = val_2018 + emi[0]*val_2018/100
    for i in range(1, 13):
        emi[i] = emi[i-1] + emi[i]*emi[i-1]/100
    return emi

def from_value_to_rates(value_1, value_2):

    zipped_lists = zip(value_1, value_2)
    fossil_emi_rates_fact = [100*(value_2-value_1)/value_1 for (value_1, value_2) in zipped_lists]
        
    return np.array(fossil_emi_rates_fact)

def monte_carlo_simulation(file, distri, ssp, samples):
    mean, std = get_mean_std(file, ssp)
    params = get_emi_factor_distribution(distri)
    emi_sim = np.zeros((13, samples)) 
    
    for i in range(13):
        oil_sim = np.random.normal(mean['cons_oil'].iloc[i], std['cons_oil'].iloc[i], samples)
        coal_sim = np.random.normal(mean['cons_coal'].iloc[i], std['cons_coal'].iloc[i], samples)
        gas_sim = np.random.normal(mean['cons_gas'].iloc[i], std['cons_gas'].iloc[i], samples)
        
        if distri == 'normal':
            emi_fact_oil = np.random.normal(params['oil']['mean'], params['oil']['std'], samples)
            emi_fact_coal = np.random.normal(params['coal']['mean'], params['coal']['std'], samples)
            emi_fact_gas = np.random.normal(params['gas']['mean'], params['gas']['std'], samples)
        elif distri == 'triangular':
            emi_fact_oil = np.random.triangular(params['oil']['left'], params['oil']['mode'],
                                            params['oil']['right'], samples)
            emi_fact_coal = np.random.triangular(params['coal']['left'], params['coal']['mode'],
                                             params['coal']['right'], samples)
            emi_fact_gas = np.random.triangular(params['gas']['left'], params['gas']['mode'],
                                            params['gas']['right'], samples)

        emi_sim[i] = kaya_identity(oil_sim, coal_sim, gas_sim, emi_fact_coal, emi_fact_oil, emi_fact_gas)
    
    return emi_sim


def get_emission_factor(oil_cons, coal_cons, gas_cons, emi_fact_oil, emi_fact_coal, emi_fact_gas, total_cons):
    emi = kaya_identity(oil_cons, coal_cons, gas_cons, emi_fact_oil, emi_fact_coal, emi_fact_gas)
    return emi/total_cons
    
def get_emission_factor_rates(samples, params, distri, ssp):
    
    mean, std = get_mean_std(file_proj, ssp)
    carbon_intensity = np.zeros((14, samples))
    # Emission factor (fossil energy) of year 2017
    carbon_intensity_2017 = 2.012290082158105
    carbon_intensity_2017_ = list()
    for i in range(samples): carbon_intensity_2017_.append(carbon_intensity_2017)
    carbon_intensity[0] = np.array(carbon_intensity_2017_)
   
    for i in range(13-12):
        if distri == 'normal':
            emi_fact_oil = np.random.normal(params['oil']['mean'], params['oil']['std'], samples)
            emi_fact_coal = np.random.normal(params['coal']['mean'], params['coal']['std'], samples)
            emi_fact_gas = np.random.normal(params['gas']['mean'], params['gas']['std'], samples)
        elif distri == 'triangular':
            emi_fact_oil = np.random.triangular(params['oil']['left'], params['oil']['mode'],
                                            params['oil']['right'], samples)
            emi_fact_coal = np.random.triangular(params['coal']['left'], params['coal']['mode'],
                                             params['coal']['right'], samples)
            emi_fact_gas = np.random.triangular(params['gas']['left'], params['gas']['mode'],
                                            params['gas']['right'], samples)
        else:
            return print('No such distribution')

        oil_cons = np.random.normal(mean['cons_oil'].iloc[i], std['cons_oil'].iloc[i], samples)
        coal_cons = np.random.normal(mean['cons_coal'].iloc[i], std['cons_coal'].iloc[i], samples)
        gas_cons = np.random.normal(mean['cons_gas'].iloc[i], std['cons_gas'].iloc[i], samples)
        
        total_cons = np.random.normal(mean['total_cons'].iloc[i], std['total_cons'].iloc[i], samples)
        carbon_intensity[i+1] = get_emission_factor(oil_cons, coal_cons, gas_cons, emi_fact_oil,
                                                  emi_fact_coal, emi_fact_gas, total_cons)

    carbon_intensity_rates = np.zeros((13, samples))
    for i in range(len(carbon_intensity)-1):
        carbon_intensity_rates[i] = (carbon_intensity[i+1]-carbon_intensity[i])/carbon_intensity[i]
        
    return carbon_intensity_rates

def monte_carlo_simulation_rates(file_rates, ssp, distri, samples):
    
    mean_rates, std_rates = get_mean_std(file_rates, ssp, rates = True)
    emi_sim = np.zeros((13, samples)) 
   
    # 2018 CO2 emission : 3457 MtCO2.
    emission_2018 = 3457
    
    params = get_emi_factor_distribution(distri) # used for emission factor rates of fossil energie.
       
    for i in range(13):
    
        gdp_rates = np.random.normal(mean_rates['gdp'].iloc[i], std_rates['gdp'].iloc[i], samples)
        ei_rates = np.random.normal(mean_rates['ener_int'].iloc[i], std_rates['ener_int'].iloc[i], samples)
        
        carbon_intensity_rates = get_emission_factor_rates(samples, params, distri, ssp)
        
        emi_sim[i] = kaya_identity_rates(gdp_rates, ei_rates, carbon_intensity_rates)
       
    emi_sim = from_rates_to_data(emi_sim, emission_2018)
    
    return emi_sim


def get_carbon_emissions(samples, distri=False, file_rates=False, show=False):
    
    #list_ssp = [ssp1] # a enlever
    
    list_average_emi_ssp = list()
    for ssp in list_ssp:
        if file_rates is False:
            emi_ssp = monte_carlo_simulation(file_proj, distri, ssp, samples)
        else:
            emi_ssp = monte_carlo_simulation_rates(file_rates, ssp, distri, samples)

        list_average_emi_ssp.append(mean_forecast(emi_ssp))
        
    if show is True:
        if file_rates is False:
            show_average_forecast(list_average_emi_ssp, 'Kaya model')
        else:
            show_average_forecast(list_average_emi_ssp, 'the derivative of Kaya model')
            
    return list_average_emi_ssp

In [3]:
samples = 1000
emi_kaya_triangular = get_carbon_emissions(samples, distri='triangular', show=False)
emi_kaya_normal = get_carbon_emissions(samples, distri='normal', show=True)
emi_kaya_rates_normal = get_carbon_emissions(samples,'normal', file_rates, show=True)
emi_kaya_rates_triangular = get_carbon_emissions(samples, 'triangular', file_rates, show=False)
real_proj = show_real_projections(file_real_proj, show=False)
show_goals([emi_kaya_normal, emi_kaya_rates_normal, real_proj],
           name = ['Kaya model', 'Derivative of Kaya model', 'Projection', 'Target'], show=False)
projection_vs_forecasts([emi_kaya_normal, emi_kaya_rates_normal, real_proj],
                        name=['Kaya model', 'Derivative of Kaya model', 'Projections'], show=False)

## Confidence intervalle : not done yet

In [4]:
obj = 2.86
prev = [3.03, 3.57, 3.63, 3.58, 4.36]
i = 0
for ssp in list_ssp:
    h = (monte_carlo_simulation(ssp, samples, 'kaya 1')[11])
    s = mean_confidence_interval(h)
    g = (s[2]-s[1])*50
    print([g/obj, g/prev[i]])
    i = i+1

h = (monte_carlo_simulation(ssp, samples, 'kaya 1')[11])
s = mean_confidence_interval(h)
s

TypeError: monte_carlo_simulation() missing 1 required positional argument: 'samples'

In [None]:
#gdp_list
mean_rates_ci = []
std_rates_ci = []
for ssp in list_ssp:
    mean_rates, std_rates = get_mean_std(file_rates, ssp, rates = True)
    mean_rates_ci.append(mean_rates['carb_int'])
    std_rates_ci.append(std_rates['carb_int'])

# LES VALEURS DE STD SONT BCP TROP GRANDES !!!!!
    
print(mean_rates_ci)