# Scenario based forecasting application on European Union carbon emissions from 2018 to 2030

In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
import random
import pickle
import scipy.stats

import plotly.graph_objects as go
import plotly
from plotly.subplots import make_subplots
plotly.offline.init_notebook_mode(connected=True)
np.seterr(divide='ignore', invalid='ignore')

import plotly.express as px

ssp1 = ['pop_SSP1.csv', 'gdp_cap_SSP1.csv', 'EnergyIntensity_SSP1.csv',
        'oil_SSP1.csv', 'coal_SSP1.csv', 'gas_SSP1.csv', 'total_cons_SSP1.csv']
ssp2 = ['pop_SSP2.csv', 'gdp_cap_SSP2.csv', 'EnergyIntensity_SSP2.csv',
        'oil_SSP2.csv', 'coal_SSP2.csv', 'gas_SSP2.csv', 'total_cons_SSP2.csv']
ssp3 = ['pop_SSP3.csv', 'gdp_cap_SSP3.csv', 'EnergyIntensity_SSP3.csv',
        'oil_SSP3.csv', 'coal_SSP3.csv', 'gas_SSP3.csv', 'total_cons_SSP3.csv']
ssp4 = ['pop_SSP4.csv', 'gdp_cap_SSP4.csv', 'EnergyIntensity_SSP4.csv',
        'oil_SSP4.csv', 'coal_SSP4.csv', 'gas_SSP4.csv', 'total_cons_SSP4.csv']
ssp5 = ['pop_SSP5.csv', 'gdp_cap_SSP5.csv', 'EnergyIntensity_SSP5.csv',
        'oil_SSP5.csv', 'coal_SSP5.csv', 'gas_SSP5.csv', 'total_cons_SSP5.csv']
list_ssp = [ssp1, ssp2, ssp3, ssp4, ssp5]
name_ssp = ['ssp1', 'ssp2', 'ssp3', 'ssp4', 'ssp5']

filename = "stdKR_model.pk"
loaded_model = pickle.load(open(filename, 'rb'))

# File containing the projections (2018-2030) of the different variables 
file_proj = "proj_values_EU28/"
# File containing the projections rates (2018-2030) of the different variables 
file_rates = "proj_rates/"
# real projections of the carbon emissions
file_real_proj = "proj_values_EU28/raw_co2R_SSP"

# parameters of the normal distributions for the emission factors (coal, gas, oil)
file_emi_factor_normal = "input_distrib/fossil_fuel_emi_factors_normal_param.csv"
# parameters of the normal distributions for the emission factors (coal, gas, oil))
file_emi_factor_tri = "input_distrib/fossil_fuel_emi_factors_triangular_param.csv"
list_param = {'normal': file_emi_factor_normal, 'triangular':file_emi_factor_tri}


years = list()
for i in range(14) : years.append(2017+i)
years_rev = years[::-1]
list_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
               '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '17becf']

value_1990 = 4408526411.17188*10**(-6) # from ktoe to Mtoe
target_2030 = value_1990-0.40*(value_1990)
target_2030_ = list()
for i in range(len(years)): target_2030_.append(target_2030)
    
def get_mean_std(file_, scenario, rates=False):
    
    df_mean = pd.DataFrame({})
    df_std = pd.DataFrame({})
    
    for file in scenario:
        df = pd.read_csv(file_ + file, index_col=['Unnamed: 0'])
        m = df.mean()
        col_name = file.split('_')[0]
        df_mean[col_name] = m

        std = df.std()
        col_name = file.split('_')[0]
        df_std[col_name] = std
    if rates is True:
        df_mean.index = np.arange(2018, 2031)
        df_std.index = np.arange(2018, 2031)

    else:
        df_mean.index = np.arange(2018, 2031)
        df_std.index = np.arange(2018, 2031)
        
    
    names_col = ['pop','gdp', 'ener_int', 'cons_oil', 'cons_coal', 'cons_gas', 'total_cons']
    df_mean.columns = names_col
    df_std.columns = names_col
    
    return df_mean, df_std

def get_emi_factor_distribution(file_dict):
    params = pd.read_csv(file_dict, index_col=0)
    return params.transpose()    

def initialise_distri(file_proj, ssp, name_file, rates=False):
    mean, std = get_mean_std(file_proj, ssp, rates)
    if rates is False:
        mean = mean.drop(columns=['ener_int'])
        std = std.drop(columns=['ener_int'])
        mean.columns = ['mean_pop', 'mean_gdp', 'mean_oil', 'mean_coal', 'mean_gas', 'mean_fossil']
        std.columns = ['std_pop', 'std_gdp', 'std_oil', 'std_coal', 'std_gas', 'std_fossil']
        df = pd.concat([mean, std], axis=1, sort=False)
        file = 'input_distrib/'+name_file+'_distribution.csv'
        df.to_csv(file, index=years)
        
    else:
        mean = mean.drop(columns=['pop'])
        std = std.drop(columns=['pop'])
        mean.columns = ['rates_mean_gdp', 'rates_mean_ei', 'rates_mean_oil', 'rates_mean_coal',
                        'rates_mean_gas', 'rates_mean_fossil']
        std.columns = ['rates_std_gdp', 'rates_std_ei', 'rates_std_oil', 'rates_std_coal',
                       'rates_std_gas', 'rates_std_fossil']
        df = pd.concat([mean, std], axis=1, sort=False)
        file = 'input_distrib/'+name_file+'_distribution_rates.csv'
        df.to_csv(file, index=years)
    
    return df
    
def initialise(file_proj, file_rates, list_ssp, name_ssp, list_param):
    ssp_distri = {}
    ssp_distri_rates = {}
    for i in range(len(name_ssp)):
        ssp_distri[name_ssp[i]] = initialise_distri(file_proj, list_ssp[i], name_ssp[i], rates=False)
        ssp_distri_rates[name_ssp[i]] = initialise_distri(file_rates, list_ssp[i], name_ssp[i], rates=True)

    param_dict = {}
    for distri, file_distri in list_param.items(): 
        param_dict[distri] = get_emi_factor_distribution(file_distri)
        
    return ssp_distri, ssp_distri_rates, param_dict

ssp_distri, ssp_distri_rates, param_dict = initialise(file_proj, file_rates, list_ssp, name_ssp, list_param)

In [2]:
def mean_forecast(x):
    mean_forecast = list()
    for i in range(13) : mean_forecast.append(np.mean(x[i]))
    return mean_forecast

def std_forecast(x):
    std_forecast = list()
    for i in range(13) : std_forecast.append(np.std(x[i], ddof = 1))
    return std_forecast

def show_forecast(list_forecast_ssp, list_dev_forecast_ssp, nb_sample, name):
    for i in range(len(list_forecast_ssp)):
        fig = go.Figure()
        
        frcst_m = np.array(list_forecast_ssp[i].copy())
        frcst_d = np.array(list_dev_forecast_ssp[i].copy())
        frcst_m = np.append(3144, frcst_m)
        frcst_d = np.append(0, frcst_d)
        
        confidence = frcst_d
        y_upper = frcst_m + confidence
        y_lower = frcst_m - confidence
        y_lower = y_lower[::-1]
        
        x_final = np.concatenate((years,years_rev))
        y_final = np.concatenate((y_upper,y_lower))

        fig.add_trace(go.Scatter(
            x=x_final,
            y=y_final,
            fill='toself',
            fillcolor=list_colors[i],
            opacity=0.1,
            line_color='rgba(0, 0, 0, 0)',
            showlegend=False,
            name="SSP"+str(i+1),
        ))
        fig.add_trace(go.Scatter(x=years, y=frcst_m, opacity=0.8,
                                 line=dict(color=list_colors[i]), name= "SSP"+str(i+1)))
        
        fig.add_trace(go.Scatter(x=years, y=target_2030_, opacity=0.8, mode='lines',
                                 line=dict(color="Black"), name="EU target"))
    

    
        # Use date string to set xaxis range
        fig.update_layout(
            title_text="Forecasts of CO2 emissions in function of SSP"+str(i+1)+" based on "+name)

        fig.update_xaxes(title_text="Years")
        fig.update_yaxes(title_text="CO2 emissions [MtCO2eq]", range=[1400, 3700]),
        #fig.update_layout(legend_orientation="h")
        fig.show()

def show_average_forecast(list_forecast_ssp, list_dev_forecast_ssp, nb_sample, name):
    fig = go.Figure()
    for i in range(len(list_forecast_ssp)):
        frcst_m = np.array(list_forecast_ssp[i].copy())
        frcst_d = np.array(list_dev_forecast_ssp[i].copy())
        frcst_m = np.append(3144, frcst_m)
        frcst_d = np.append(0, frcst_d)
        
        confidence = 1.96 * frcst_d/math.sqrt(nb_sample)
        y_upper = frcst_m + confidence
        y_lower = frcst_m - confidence
        y_lower = y_lower[::-1]
        
        x_final = np.concatenate((years,years_rev))
        y_final = np.concatenate((y_upper,y_lower))

        fig.add_trace(go.Scatter(
            x=x_final,
            y=y_final,
            fill='toself',
            fillcolor=list_colors[i],
            opacity=0.1,
            line_color='rgba(0, 0, 0, 0)',
            showlegend=False,
            name="SSP"+str(i+1),
        ))
        fig.add_trace(go.Scatter(x=years, y=frcst_m, opacity=0.8,
                                 line=dict(color=list_colors[i]), name= "SSP"+str(i+1)))
        
    fig.add_trace(go.Scatter(x=years, y=target_2030_, opacity=0.8, mode='lines',
                             line=dict(color="Black"), name="EU target"))
    
    # Use date string to set xaxis range
    fig.update_layout(
        title_text="Forecasts of CO2 emissions in function of the family of scenarios based on "+name)


    fig.update_xaxes(title_text="Years")
    fig.update_yaxes(title_text="CO2 emissions [MtCO2eq]"),
    #fig.update_layout(legend_orientation="h")
    fig.show()
    
def show_projection_CO2(list_projection_CO2):
    fig = go.Figure()
    for i in range(5):
        co2_proj = list_projection_CO2[i].to_numpy()
        co2_proj = np.append(3144, co2_proj)
        fig.add_trace(go.Scatter(x=years, y=co2_proj, opacity=0.8,
                                 line=dict(color=list_colors[i]), name="SSP"+str(i+1)))
    
    fig.add_trace(go.Scatter(x=years, y=target_2030_, opacity=0.8, mode= 'lines',
                             line=dict(color="Black"), name="EU target"))
    
    # Use date string to set xaxis range
    fig.update_layout(title_text="CO2 emission projections for each SSPS Family")


    fig.update_xaxes(title_text="Years")
    fig.update_yaxes(title_text="CO2 emissions [MtCO2eq]"),
    #fig.update_layout(legend_orientation="h")
    fig.show()

    
def projection_vs_forecasts(emi, name, show=False):
    
    list_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
                   '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '17becf']
    
    fig = go.Figure()
    
    fig = make_subplots(rows=3, cols=2, subplot_titles=("SSP1", "SSP2", "SSP3", "SSP4", "SSP5"))
    
    roww = [1, 1, 2, 2, 3]
    coll = [1, 2, 1, 2, 1]
    
    for i in range(len(coll)):
        for j in range(len(emi)):
            y=emi[j][i]
            fig.add_trace(go.Scatter(x=years, y=y, opacity=0.8, line=dict(color=list_colors[j+2]), 
                                    name=name[j]), row=roww[i], col=coll[i])
            
        fig.add_trace(go.Scatter(x=years, y=target_2030_, opacity=0.8, mode ='lines',
                             line=dict(color="Black"), name="EU target"), row=roww[i], col=coll[i])
                            
        fig.update_xaxes(title_text="Years", row=roww[i], col=coll[i])
        fig.update_yaxes(title_text="CO2 emissions [MtCO2eq]", row=roww[i], col=coll[i])
    
    fig.update_layout(title_text="Comparison between forecasts with projections for each SSP family",
                      height=1000, width=1000, showlegend=True, legend_orientation="h")
    if show is True:
        fig.show()
    
def show_real_projections(file, show=False):
    list_file = list()
    projection_CO2 = list()
    for i in range(5):
        list_file.append(file+str(i+1)+".csv")
        projection_CO2.append(pd.read_csv(list_file[i]).iloc[0][1:])
    
    if show is True:
        show_projection_CO2(projection_CO2)
    
    return projection_CO2

def show_goals(emi_list, name, show=False):
    fig = go.Figure()
    fig = make_subplots(rows=3, cols=2, subplot_titles=("SSP1", "SSP2", "SSP3", "SSP4", "SSP5"))
    
    roww = [1, 1, 2, 2, 3]
    coll = [1, 2, 1, 2, 1]

    x_name = name
  
    for i in range(len(coll)):
        values = list()
        color = list()
        for j in range(len(emi_list)):
            values.append(emi_list[j][i][11])
            color.append(list_colors[j+1])
        values.append(target_2030)
        color.append(list_colors[0])
    
        fig.add_trace(go.Bar(x=x_name, y=values, marker_color = color, textposition='auto'),
                      row=roww[i], col=coll[i])

        fig.update_xaxes(row=roww[i], col=coll[i])
        fig.update_yaxes(title_text="CO2 emissions [MtCO2eq2]", row=roww[i], col=coll[i])
        
    fig.update_layout(title_text="Target set by the European Union for CO2 emissions"+
                      " in 2030 compared to forecasts and projections",
                      height=1000, width=1000, showlegend=False, legend_orientation="h")
    if show is True:
        fig.show()

def from_rates_to_data(emi, val_2017):
    emi[0] = val_2017 + emi[0]*val_2017/100
    for i in range(1, 13):
        emi[i] = emi[i-1] + emi[i]*emi[i-1]/100
    return emi

def from_value_to_rates(value_1, value_2):

    zipped_lists = zip(value_1, value_2)
    fossil_emi_rates_fact = [100*(value_2-value_1)/value_1 for (value_1, value_2) in zipped_lists]
        
    return np.array(fossil_emi_rates_fact)

def kernel_model(samples, var):
    emi_sim = np.zeros((13, samples)) 
     
    for i in range(13):
        pop = np.random.normal(var['mean_pop'].iloc[i], var['std_pop'].iloc[i], samples)
        gdp_cap = np.random.normal(var['mean_gdp'].iloc[i], var['std_gdp'].iloc[i], samples)
        gdp = pop*gdp_cap
        oil_cons = np.random.normal(var['mean_oil'].iloc[i], var['std_oil'].iloc[i], samples)
        coal_cons = np.random.normal(var['mean_coal'].iloc[i], var['std_oil'].iloc[i], samples)
        gas_cons = np.random.normal(var['mean_gas'].iloc[i], var['std_oil'].iloc[i], samples)
        fossil_cons = np.random.normal(var['mean_fossil'].iloc[i], var['std_fossil'].iloc[i], samples)

        X = np.array([pop, gdp, gas_cons, oil_cons, coal_cons, fossil_cons]).transpose()
        emi_sim[i] = loaded_model.predict(X)
    
    return emi_sim
    

def kaya_identity(cons_oil, cons_coal, cons_gas, emi_fact_coal, emi_fact_oil, emi_fact_gas):
    cons_coef_oil = 2.293
    cons_coef_coal = 4.019
    cons_coef_gas = 2.236 # [MtCO2/MtOilEq]
    res1 = ((cons_oil*emi_fact_oil)+(cons_coal*emi_fact_oil)+(cons_gas*emi_fact_oil))
    res2 = ((cons_oil*cons_coef_oil)+(cons_coal*cons_coef_coal)+(cons_gas*cons_coef_gas))
    return res2

def kaya_identity_rates(gdp_rates, ei_rates, ci_rates):
    return (1+gdp_rates)*(1+ei_rates)*(1+ci_rates)-1


def get_emission_factor(oil_cons, coal_cons, gas_cons, emi_fact_oil, emi_fact_coal, emi_fact_gas, total_cons):
    emi = kaya_identity(oil_cons, coal_cons, gas_cons, emi_fact_oil, emi_fact_coal, emi_fact_gas)

    return emi/total_cons
    
def get_carbon_intensity(samples, coef, var, distri):
    
    carbon_intensity = np.zeros((14, samples))
    # Emission factor (fossil energy) of year 2017
    carbon_intensity_2017 = 2.012290082158105
    carbon_intensity_2017_ = list()
    for i in range(samples): carbon_intensity_2017_.append(carbon_intensity_2017)
    carbon_intensity[0] = np.array(carbon_intensity_2017_)
   
    for i in range(13):
        if distri is 'normal':
            coef_oil = np.random.normal(coef['oil']['mean'], coef['oil']['std'], samples)
            coef_coal = np.random.normal(coef['coal']['mean'], coef['coal']['std'], samples)
            coef_gas = np.random.normal(coef['gas']['mean'], coef['gas']['std'], samples)
        elif distri is 'triangular':
            coef_oil = np.random.triangular(coef['oil']['left'], coef['oil']['mode'],
                                            coef['oil']['right'], samples)
            coef_coal = np.random.triangular(coef['coal']['left'], coef['coal']['mode'],
                                             coef['coal']['right'], samples)
            coef_gas = np.random.triangular(coef['gas']['left'], coef['gas']['mode'],
                                            coef['gas']['right'], samples)
        else:
            return print("Wrong distribution")

        oil_cons = np.random.normal(var['mean_oil'].iloc[i], var['std_oil'].iloc[i], samples)
        coal_cons = np.random.normal(var['mean_coal'].iloc[i], var['std_oil'].iloc[i], samples)
        gas_cons = np.random.normal(var['mean_gas'].iloc[i], var['std_oil'].iloc[i], samples)
   
        total_cons = np.random.normal(var['mean_fossil'].iloc[i], var['std_fossil'].iloc[i], samples)
        carbon_intensity[i+1] = get_emission_factor(oil_cons, coal_cons, gas_cons, coef_oil,
                                                  coef_coal, coef_gas, total_cons)

    carbon_intensity_rates = np.zeros((13, samples))
    for i in range(len(carbon_intensity)-1):
        carbon_intensity_rates[i] = (carbon_intensity[i+1]-carbon_intensity[i])/carbon_intensity[i]
        
    return carbon_intensity_rates
    
    
def monte_carlo_simulation_rates(samples, distri, coef, var, var_rates):

    emi_sim = np.zeros((13, samples)) 
   
    # 2018 CO2 emission : 3457 MtCO2.
    #emission_2018 = 3457 
    
    emission_2017 = 3144
    
    carbon_intensity_rates = get_carbon_intensity(samples, coef, var, distri)
    
    for i in range(13):
    
        gdp_rates = np.random.normal(var_rates['rates_mean_gdp'].iloc[i],
                                     var_rates['rates_std_gdp'].iloc[i], samples)
        ei_rates = np.random.normal(var_rates['rates_mean_ei'].iloc[i],
                                    var_rates['rates_std_ei'].iloc[i], samples)
        emi_sim[i] = kaya_identity_rates(gdp_rates, ei_rates, carbon_intensity_rates[i])
       
    emi_sim = from_rates_to_data(emi_sim, emission_2017)
    
    return emi_sim

def monte_carlo_simulation(samples, distri, coef, var):

    emi_sim = np.zeros((13, samples)) 
    
    for i in range(13):
        oil_cons = np.random.normal(var['mean_oil'].iloc[i], var['std_oil'].iloc[i], samples)
        coal_cons = np.random.normal(var['mean_coal'].iloc[i], var['std_oil'].iloc[i], samples)
        gas_cons = np.random.normal(var['mean_gas'].iloc[i], var['std_oil'].iloc[i], samples)
        
        if distri is 'normal':
            coef_oil = np.random.normal(coef['oil']['mean'], coef['oil']['std'], samples)
            coef_coal = np.random.normal(coef['coal']['mean'], coef['coal']['std'], samples)
            coef_gas = np.random.normal(coef['gas']['mean'], coef['gas']['std'], samples)
        elif distri is 'triangular':
            coef_oil = np.random.triangular(coef['oil']['left'], coef['oil']['mode'],
                                            coef['oil']['right'], samples)
            coef_coal = np.random.triangular(coef['coal']['left'], coef['coal']['mode'],
                                             coef['coal']['right'], samples)
            coef_gas = np.random.triangular(coef['gas']['left'], coef['gas']['mode'],
                                            coef['gas']['right'], samples)
        else:
            return print("Wrong distribution")
        
        emi_sim[i] = kaya_identity(oil_cons, coal_cons, gas_cons, coef_coal, coef_oil, coef_gas)
    
    return emi_sim
  

def get_carbon_emissions(samples, distri=False, rates=False, mean=True, show=False):
    
    list_average_emi_ssp = list()
    list_dev_emi_ssp = list()
        
    for ssp in name_ssp:

        if rates is False:
            emi_ssp = monte_carlo_simulation(samples, distri, param_dict[distri], ssp_distri[ssp])
        elif rates is True:
            emi_ssp = monte_carlo_simulation_rates(samples, distri, param_dict[distri],
                                                   ssp_distri[ssp], ssp_distri_rates[ssp])
        elif rates is 'kernel':
            emi_ssp = kernel_model(samples, ssp_distri[ssp]) ########################################################

        list_average_emi_ssp.append(mean_forecast(emi_ssp))
        list_dev_emi_ssp.append(std_forecast(emi_ssp))

    if show is True:
        if rates is False:
            show_average_forecast(list_average_emi_ssp, list_dev_emi_ssp, samples,'Kaya model')
            show_forecast(list_average_emi_ssp, list_dev_emi_ssp, samples, 'Kaya model')
            
        elif rates is True:
            show_average_forecast(list_average_emi_ssp, list_dev_emi_ssp, samples, 'derived Kaya model')
            show_forecast(list_average_emi_ssp, list_dev_emi_ssp, samples, 'derived Kaya model')
        elif rates is 'kernel':
            show_average_forecast(list_average_emi_ssp, list_dev_emi_ssp, samples, 'a Kernel Ridge model')
            show_forecast(list_average_emi_ssp, list_dev_emi_ssp, samples, 'a Kernel Ridge model')
    
    if mean is True:
        return list_average_emi_ssp
    elif mean is False:
        return emi_ssp
    else:
        return print('Bad value for mean parameter')


# RESULTS OF MONTE CARLO SIMULATION

In [8]:
samples = 10000
emi_kaya_norm = get_carbon_emissions(samples, 'normal', show=True)
emi_kaya_tri = get_carbon_emissions(samples, 'triangular', show=False)

samples = 5000
emi_kaya_norm_rates = get_carbon_emissions(samples, 'normal', rates=True, show=False)
emi_kaya_tri_rates = get_carbon_emissions(samples, 'triangular', rates=True, show=False)

kernel_emi = get_carbon_emissions(samples=10000, rates='kernel', show=False)

real_proj = show_real_projections(file_real_proj, show=False)
show_goals([emi_kaya_norm, emi_kaya_norm_rates, real_proj],
           name = ['Kaya model', 'Derived of Kaya model', 'Projection', 'Target'], show=True)

projection_vs_forecasts([emi_kaya_norm, emi_kaya_norm_rates, real_proj],
                        name=['Kaya model', 'Derived of Kaya model', 'Projections'], show=True)


### Smallest number of iteration for the simulation to be constant

In [4]:
from sklearn.metrics import mean_squared_error
from math import sqrt

samples_list = [10, 100, 1000, 5000, 10000]
n_values = ['10', '100', '1000', '5000', '10000']
n = 10

def smallest_number_of_repetition(samples_list, n_values, n, distri, rates=False):
    sum_diff = np.zeros((len(samples_list), n+1))
    for i in range(n):
        for j in range(len(samples_list)):
            result = list()
            for k in range(2):
                if rates is False:
                    result.append(get_carbon_emissions(samples_list[j], distri)[0])
                else:
                    result.append(get_carbon_emissions(samples_list[j], distri, rates=True)[0])
                
            sum_diff[j][i] = sqrt(mean_squared_error(result[0], result[1]))

    for i in range(sum_diff.shape[0]):
        sum_diff[i][sum_diff.shape[1]-1] = np.var(sum_diff[i : sum_diff.shape[1]-1])     

    dict_ = {}

    for i in range(sum_diff.shape[1]-1):
        dict_[str(i+1)] = sum_diff[:,i]

    dict_['var'] = sum_diff[:,sum_diff.shape[1]-1]
    data_n = pd.DataFrame(data=dict_, index=n_values)
    return data_n

smallest_number_of_repetition(samples_list, n_values, n, 'normal', rates=True)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,var
10,80.941221,46.06393,101.485156,121.469667,30.514417,43.783152,67.936265,123.44035,60.279457,36.050089,882.355575
100,31.745467,19.481886,30.384968,19.167454,6.941349,23.561485,52.038927,30.637351,7.61771,42.246611,137.679517
1000,7.475156,8.259472,1.25524,7.874266,9.380261,5.362734,5.171189,11.130039,13.51346,8.466281,11.566162
5000,2.891239,1.79232,1.661117,6.007556,2.231502,7.995885,1.528065,5.499681,3.455311,2.320533,3.899901
10000,1.451838,5.497244,1.309779,1.668081,1.8831,2.996048,3.06863,0.805273,1.913329,4.344672,2.310289


In [5]:
smallest_number_of_repetition(samples_list, n_values, n, 'normal', rates=False)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,var
10,187.090396,157.534929,202.758073,192.518285,115.422823,160.990716,140.574446,157.23387,98.338936,108.652325,3235.111687
100,70.162549,61.395106,41.643517,49.333833,33.288161,58.952071,53.902245,51.811554,29.869407,44.900551,359.706303
1000,17.053343,16.056521,11.122465,11.636155,13.466534,14.554309,15.528812,11.661233,14.88729,13.503131,22.386566
5000,7.592224,8.228422,3.628504,8.063754,7.184899,6.728096,8.922104,4.709246,7.535963,7.950477,5.849624
10000,5.560299,4.134437,3.669266,4.983999,3.223498,7.480591,3.920889,4.521418,4.145041,5.007475,2.993179


## Distribution of the emissions and confidence interval in 2030

In [6]:
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

def histogram_confidence_interval(distri, ssp, histo=False, confidence=False, rates=False):
    
    if rates is False:
        samples = 10000
        emi = monte_carlo_simulation(samples, distri, param_dict[distri], ssp_distri[ssp])
    else:
        samples = 5000
        emi = monte_carlo_simulation_rates(samples, distri, param_dict[distri], ssp_distri[ssp], ssp_distri_rates[ssp])
    
    # only 2030
    x = emi[12,:]
    
    mean = np.mean(x)
    std = np.std(x)
    left_born = mean - 1.96*(std/samples**0.5)
    right_born = mean + 1.96*(std/samples**0.5)
    conf_int = [left_born, right_born]
    
    if confidence is True:
        print(ssp + ' => confidence interval' + ' : ' + str(conf_int)
              + ' and '+'standard deviation' + ' : ' + str(std))
    
    if histo is True:
        if rates is True:
            plt.title('Histogram of carbon emission in 2030'+'\n'+
                      'with the derived Kaya Identity applied on '+ssp+'.')
        else:
            plt.title('Histogram of carbon emission in 2030'+'\n'+
                      'with the Kaya Identity applied on '+ssp+'.')
            
        plt.hist(x, color = 'blue', edgecolor = 'black', bins = int(180/5))
        plt.xlabel('Carbon emissions [MtCO2]')
        plt.ylabel('Frequence')


In [7]:
for ssp in name_ssp:
    histogram_confidence_interval('normal', ssp, confidence=True, rates=True)

ssp1 => confidence interval : [1509.0043159152153, 1518.1853777693739] and standard deviation : 165.61201774409477
ssp2 => confidence interval : [2116.7284985581236, 2126.5789050380554] and standard deviation : 177.68594947458357
ssp3 => confidence interval : [2688.9342353682277, 2697.6976155559723] and standard deviation : 158.0776927772889
ssp4 => confidence interval : [1942.7465305627902, 1954.4901092163796] and standard deviation : 211.83581891200333
ssp5 => confidence interval : [1802.233629788332, 1810.486723915139] and standard deviation : 148.8729291539781
