### This notebook preprocesses data for stacked area charts

In [1]:
# Load some libraries
import pandas as pd
import json
from pandas import DataFrame
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("../data/data0.csv") # read csv files

In [3]:
df.head()

Unnamed: 0,Year,Country,Code,Parameter,Cropland,Grazing,Forest,Fishing,Builtup,Carbon,Total
0,1975,United Arab Emirates,AE,GDP,,,,,,,102479.1875
1,1976,United Arab Emirates,AE,GDP,,,,,,,102319.726562
2,1977,United Arab Emirates,AE,GDP,,,,,,,107452.179688
3,1978,United Arab Emirates,AE,GDP,,,,,,,92822.359375
4,1979,United Arab Emirates,AE,GDP,,,,,,,100480.4375


In [4]:
# drop unused columns
df.drop(['Code', 'Total'], axis=1, inplace=True)

In [5]:
df.head()

Unnamed: 0,Year,Country,Parameter,Cropland,Grazing,Forest,Fishing,Builtup,Carbon
0,1975,United Arab Emirates,GDP,,,,,,
1,1976,United Arab Emirates,GDP,,,,,,
2,1977,United Arab Emirates,GDP,,,,,,
3,1978,United Arab Emirates,GDP,,,,,,
4,1979,United Arab Emirates,GDP,,,,,,


In [6]:
regions = ['Africa',
 'Asia',
 'Caribbean',
 'Central America',
 'Central Asia',
 'Eastern Africa',
 'Eastern Asia',
 'Eastern Europe',
 'Europe',
 'Latin America and the Caribbean',
 'Middle Africa',
 'North America',
 'Northern Africa',
 'Northern Europe',
 'Oceania',
 'South America',
 'South-Eastern Asia',
 'Southern Africa',
 'Southern Asia',
 'Southern Europe',
 'Western Africa',
 'Western Asia',
 'Western Europe',
 'World']

### 1. Ecological Footprint as Number of Earths

In [7]:
df1 = df[df.Parameter == "Earths"]
df1.drop(['Parameter'], axis=1, inplace=True)

In [8]:
# due to large number of countries
# we filter the data of specific regions and the global data
df2 = df1.loc[df['Country'].isin(regions)] 
df2 = df2.fillna(0) # fill NaN value as 0
# sort the dataframe
df2 = df2.sort_values(by=['Country','Year'], ascending = [False,True])

In [9]:
df2.head()

Unnamed: 0,Year,Country,Cropland,Grazing,Forest,Fishing,Builtup,Carbon
66212,1961,World,0.150054,0.084768,0.137532,0.030539,0.008501,0.320909
66411,1962,World,0.155637,0.085009,0.137814,0.031741,0.008955,0.332771
66610,1963,World,0.157349,0.086674,0.139167,0.032749,0.009167,0.356304
66809,1964,World,0.161658,0.08561,0.143449,0.031934,0.00958,0.377762
67008,1965,World,0.163904,0.087516,0.144819,0.034261,0.009868,0.397384


In [10]:
df2.to_csv(path_or_buf="../data/StackAreaChart_Earths.csv", index = False)

### 2. Biocapacity Per Capita

In [11]:
df3 = df[df.Parameter == "BiocapPerCap"]
df3.drop(['Parameter'], axis=1, inplace=True)


In [12]:
df3.head()

Unnamed: 0,Year,Country,Cropland,Grazing,Forest,Fishing,Builtup,Carbon
5,1980,United Arab Emirates,0.035951,0.007318,0.447696,4.189307,0.0,0.0
12,1981,United Arab Emirates,0.040069,0.006819,0.416497,3.903319,0.0,0.0
19,1982,United Arab Emirates,0.055132,0.00639,0.392868,3.658069,0.0,0.0
26,1983,United Arab Emirates,0.057204,0.006051,0.373031,3.46401,0.0,0.0
33,1984,United Arab Emirates,0.049506,0.005714,0.354303,3.270834,0.0,0.0


In [13]:
df4 = df3.loc[df['Country'].isin(regions)] 
df4 = df4.fillna(0)
# sort the dataframe
df4 = df4.sort_values(by=['Country','Year'], ascending = [False,True])

In [14]:
df4.head()

Unnamed: 0,Year,Country,Cropland,Grazing,Forest,Fishing,Builtup,Carbon
66065,1961,World,0.467549,0.478144,1.778773,0.36491,0.026488,0.0
66264,1962,World,0.478817,0.469073,1.743816,0.357243,0.027549,0.0
66463,1963,World,0.474732,0.458938,1.706461,0.349266,0.027658,0.0
66662,1964,World,0.480774,0.44992,1.67307,0.341765,0.02849,0.0
66861,1965,World,0.478975,0.44082,1.639838,0.333822,0.028836,0.0


In [15]:
df4.to_csv(path_or_buf="../data/StackAreaChart_Biocap.csv", index = False)