In [57]:
import pandas as pd
import math
import numpy as np
from sklearn.linear_model import LinearRegression

In [44]:
df = pd.read_csv("data/c02_emissions.tsv", sep="\t")

def toNum(i):
    i = i.strip().replace("%","").replace(",", "")
    return int(i)

#convert string numbers to ints
df["c02"] = df["Fossil CO2 Emissions (tons)"].apply(toNum)

df

Unnamed: 0,Year,Fossil CO2 Emissions (tons),CO2 emisions change,CO2 emissions per capita,Population,Pop.change,c02
0,2016,35753305000,0.34%,4.79,7464022049,1.14 %,35753305000
1,2015,35631078000,-0.16%,4.83,7379797139,1.16 %,35631078000
2,2014,35686780000,0.76%,4.89,7295290765,1.17 %,35686780000
3,2013,35416599000,1.80%,4.91,7210581976,1.19 %,35416599000
4,2012,34790564000,0.61%,4.88,7125828059,1.20 %,34790564000
5,2011,34578390000,2.95%,4.91,7041194301,1.21 %,34578390000
6,2010,33587787000,5.72%,4.83,6956823603,1.22 %,33587787000
7,2009,31770516000,-1.10%,4.62,6872767093,1.23 %,31770516000
8,2008,32124428000,0.65%,4.73,6789088686,1.24 %,32124428000
9,2007,31916504000,3.77%,4.76,6705946610,1.24 %,31916504000


In [87]:

#now create a projected dataset based on certain scenarios
years = range(2016, 2100, 1)

#p1 linear projected output (business as usual)
reg_co2 = []
X = df["Year"].to_numpy().reshape(-1, 1)
Y = df["c02"].to_numpy().reshape(-1, 1)
reg = LinearRegression().fit(X,Y)
for year in years:
    co2 = reg.predict(np.array(year).reshape(-1, 1))
    reg_co2.append(int(co2[0][0]))
reg_co2 = np.array(reg_co2)


#simulated constant changes
last_year = df["c02"][0]
print(last_year)

def simulate(years, last_year, delta=0.01, increase=True):
    """ given our years, last value and a delta, simulate new values"""
    arr = []
    for year in years:
        diff = last_year * delta
        if increase:
            this_year = last_year+diff
        else:
            this_year = last_year-diff
        #so large, just round to integer
        this_year = int(this_year)
        arr.append(this_year)
        last_year = this_year
    return arr

#s1 1% increase year after year
s1 = simulate(years, last_year, delta=0.01)

#s2 5% increase year after year
s2 = simulate(years, last_year, delta=0.05)

#s3 10% increase year after year
s3 = simulate(years, last_year, delta=0.1)

#s4 1% decrease year after year
s4 = simulate(years, last_year, delta=0.01, increase=False)

#s5 5% decrease year after year
s5 = simulate(years, last_year, delta=0.05, increase=False)

#s6 10% decrease year after year
s6 = simulate(years, last_year, delta=0.1, increase=False)

#TODO: combine with previous values

sdf = pd.DataFrame({"year":years, 
                    "s0":reg_co2, 
                    "s1":s1, 
                    "s2":s2, 
                    "s3":s3, 
                    "s4":s4, 
                    "s5":s5, 
                    "s6":s6})


def toGIGA(i):
    i = i / 1e9
    return int(i)

#convert to gigatons
sdf["s0"] = sdf["s0"].apply(toGIGA)
sdf["s1"] = sdf["s1"].apply(toGIGA)
sdf["s2"] = sdf["s2"].apply(toGIGA)
sdf["s3"] = sdf["s3"].apply(toGIGA)
sdf["s4"] = sdf["s4"].apply(toGIGA)
sdf["s5"] = sdf["s5"].apply(toGIGA)
sdf["s6"] = sdf["s6"].apply(toGIGA)

sdf.to_csv("data/projected_co2.csv", index=False)
sdf

35753305000


Unnamed: 0,year,s0,s1,s2,s3,s4,s5,s6
0,2016,34,36,37,39,35,33,32
1,2017,35,36,39,43,35,32,28
2,2018,35,36,41,47,34,30,26
3,2019,35,37,43,52,34,29,23
4,2020,36,37,45,57,34,27,21
...,...,...,...,...,...,...,...,...
79,2095,69,79,1771,73237,16,0,0
80,2096,69,80,1860,80560,15,0,0
81,2097,70,80,1953,88616,15,0,0
82,2098,70,81,2051,97478,15,0,0
