In [143]:
import pandas as pd
import numpy as np
from functools import reduce

## Greenhouse emission data

In [2]:
co2 = pd.read_csv('ghg/co2.csv')
ch4 = pd.read_csv('ghg/ch4.csv')
hfc = pd.read_csv('ghg/hfc.csv')
n2o = pd.read_csv('ghg/n2o.csv')
nf3 = pd.read_csv('ghg/nf3.csv')
pfc = pd.read_csv('ghg/pfc.csv')
sf6 = pd.read_csv('ghg/sf6.csv')

In [19]:
co2_sum = co2.groupby('Year').sum()
ch4_sum = ch4.groupby('Year').sum()
hfc_sum = hfc.groupby('Year').sum()
n2o_sum = n2o.groupby('Year').sum()
nf3_sum = nf3.groupby('Year').sum()
pfc_sum = pfc.groupby('Year').sum()
sf6_sum = sf6.groupby('Year').sum()

In [47]:
ghg_emission = pd.DataFrame(data = {
    'co2': co2_sum.values.reshape(26),
    'ch4': ch4_sum.values.reshape(26),
    'hfc': hfc_sum.values.reshape(26),
    'n2o': n2o_sum.values.reshape(26),
    'nf3': nf3_sum.values.reshape(26),
    'pfc': pfc_sum.values.reshape(26),
    'sf6': sf6_sum.values.reshape(26),
    'year': list(reversed(range(1990, 2016)))
})

### Convert data

In [62]:
ch4_factor = 25
hfc_factor = 14800
n2o_factor = 298
nf3_factor = 17200
pfc_factor = 13300
sf6_factor = 22800

In [63]:
ghg_emission['ch4'] = ghg_emission['ch4'].apply(lambda x: x * ch4_factor)
ghg_emission['hfc'] = ghg_emission['hfc'].apply(lambda x: x * hfc_factor)
ghg_emission['n2o'] = ghg_emission['n2o'].apply(lambda x: x * n2o_factor)
ghg_emission['nf3'] = ghg_emission['nf3'].apply(lambda x: x * nf3_factor)
ghg_emission['pfc'] = ghg_emission['pfc'].apply(lambda x: x * pfc_factor)
ghg_emission['sf6'] = ghg_emission['sf6'].apply(lambda x: x * sf6_factor)

In [64]:
def compute_variance(data):
    base = data[-1]
    arr = []
    for value in data:
        arr.append((value - base)/base)
    
    return arr

In [65]:
co2_var = compute_variance(ghg_emission['co2'].values)
ch4_var = compute_variance(ghg_emission['ch4'].values)
hfc_var = compute_variance(ghg_emission['hfc'].values)
n2o_var = compute_variance(ghg_emission['n2o'].values)
nf3_var = compute_variance(ghg_emission['nf3'].values)
pfc_var = compute_variance(ghg_emission['pfc'].values)
sf6_var = compute_variance(ghg_emission['sf6'].values)

In [66]:
ghg_variation = pd.DataFrame(data = {
    'co2': co2_var,
    'ch4': ch4_var,
    'hfc': hfc_var,
    'n2o': n2o_var,
    'nf3': nf3_var,
    'pfc': pfc_var,
    'sf6': sf6_var,
    'year': list(reversed(range(1990, 2016)))
})

In [117]:
ghg_variation.to_csv('ghg/summerized_ghg_data.csv', index=False)

## Ocean heat content

### load and renmaing columns

In [118]:
ocean = pd.read_csv('oceans/t00mn1.csv', delimiter=' ', header=None, dtype=np.float64)

In [119]:
ocean.columns = range(10)

In [120]:
ocean.to_csv('oceans/data.csv', index=False)

In [121]:
ocean = pd.DataFrame(data=ocean.values.reshape((5940, 360)), columns=range(360))

In [122]:
ocean = ocean.replace(-99.0, np.NaN)

In [123]:
years = []
for i in range(66):
    y = [1955 + i] * 90
    years.extend(y)

years = np.array(years)

In [124]:
ocean['year'] = years

In [125]:
ocean.to_csv('oceans/year_data.csv', index=False)

In [126]:
_median = ocean.groupby('year').median()
_mean = ocean.groupby('year').mean()
_variance = ocean.groupby('year').var()
_max = ocean.groupby('year').max()
_min = ocean.groupby('year').min()

In [127]:
_median = _median.min(axis=1).values
_mean = _mean.min(axis=1).values
_variance = _variance.min(axis=1).values
_max = _max.min(axis=1).values
_min = _min.min(axis=1).values

In [128]:
df = pd.DataFrame(data={
    'median': _median, 
    'mean': _mean, 
    'variance': _variance, 
    'max': _max, 
    'min': _min,
    'year': range(1955, 2021)
})

In [129]:
df.to_csv('oceans/summerized_ocean_data.csv', index=False)

## Load temperature

In [130]:
temperatures = pd.read_csv('temperatures/GlobalLandTemperaturesByCountry.csv')

In [131]:
temp_groups = temperatures.groupby(['dt'])

In [132]:
_median = temp_groups.median()
_mean = temp_groups.mean()
_variance = temp_groups.var()
_max = temp_groups.max()
_min = temp_groups.min()

In [133]:
df = pd.DataFrame(data={
    'median': _median['AverageTemperature'].values, 
    'mean': _mean['AverageTemperature'].values, 
    'variance': _variance['AverageTemperature'].values, 
    'max': _max['AverageTemperature'].values, 
    'min': _min['AverageTemperature'].values,
    'year': range(1743, 2014)
})

In [134]:
df.to_csv('temperatures/summerized_temperature_data.csv', index=False)

## Join data

In [141]:
ghg_variation_summery = pd.read_csv('ghg/summerized_ghg_data.csv')
ocean_summerry = pd.read_csv('oceans/summerized_ocean_data.csv')
temperature_summery = pd.read_csv('temperatures/summerized_temperature_data.csv')

In [148]:
dfs = [temperature_summery, ocean_summerry, ghg_variation_summery]
df_final = reduce(lambda left,right: pd.merge(left,right,on='year', how='outer'), dfs)

In [152]:
df_final.to_csv('merged_data.csv')