In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from definitions import figures_path_meth, data_source_path, target_countries, fig_size, \
show_plots, save_figs, show_results, target_var, country_col, date_col, incl_countries, incl_years, year_col, month_col
from helper_functions import read_data, get_impl_date, get_trans, get_timescale, get_data_path, select_country_year_measure, downsample_month_to_quarter, select_country_year_measure
from plot_functions import plot_corr
from statistical_tests import stat_test

import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 15})

from sklearn.preprocessing import StandardScaler

In [2]:
# # Monthly CO2 data
# def preprocess_co2_m(source_file: str, source_country_col: str, source_year_col: str, var_name: str):
#     # read data
#     co2_m_raw = read_data(source_path=data_source_path, file_name=source_file)
#     co2_m = co2_m_raw.copy()

#     # lowercase, replace country names
#     co2_m[source_country_col] = co2_m[source_country_col].str.lower()
#     co2_m = co2_m.replace({source_country_col: corr_country_names})

#     # select countries and year
#     co2_m = select_country_year_measure(df=co2_m, country_col=source_country_col, year_col=source_year_col)
#     # pivot
#     co2_m = co2_m.melt(id_vars=[source_country_col, source_year_col],
#                        value_vars=co2_m.drop([source_country_col, source_year_col], axis=1),
#                        value_name=var_name)
#     co2_m[month_col] = co2_m.apply(lambda row: month_name_to_num(row.variable), axis=1)
#     co2_m[date_col] = pd.to_datetime(dict(year=co2_m[source_year_col], month=co2_m[month_col], day=1))
#     co2_m = co2_m.drop('variable', axis=1)

#     # rename, order and scale: output = [index, country, date, value]
#     co2_m = rename_order_scale(df=co2_m, source_country_col=source_country_col, source_year_col=source_year_col,
#                                var_name=var_name, var_scale=1e6, timeframe='m')
#     # downsample monthly to quarterly
#     co2_q = downsample_month_to_quarter(df_m=co2_m, var_name=var_name)

#     # export to csv
#     co2_m.to_csv(f'{get_data_path(timeframe="m")}{var_name}_m.csv')
#     co2_q.to_csv(f'{get_data_path(timeframe="q")}{var_name}_q.csv')

#     return co2_m, co2_q

In [9]:
def preprocess_brent_m(source_file: str, source_date_col: str, var_name: str):
    # read data
    brent_m_raw = read_data(source_path=data_source_path, file_name=source_file)
    brent_m = brent_m_raw.copy()
    
    brent_m = brent_m.rename(columns={"DATE": date_col, "BRENT": var_name})
    
    brent_m[date_col] = pd.to_datetime(brent_m[date_col])
    brent_m[year_col] = brent_m[date_col].dt.year
    brent_m[month_col] = brent_m[date_col].dt.month
    
    #select years
    brent_m = select_country_year_measure(df=brent_m, year_col=year_col)
    
    #order
#     brent_m = brent_m[[date_col, year_col, month_col, var_name]].reset_index(drop=True)
    
    # downsample to q
    brent_q = downsample_month_to_quarter(df_m=brent_m, var_name=var_name)
    
    # export to csv
    brent_m.to_csv(f'{get_data_path(timeframe="m")}{var_name}_m.csv')
    brent_q.to_csv(f'{get_data_path(timeframe="q")}{var_name}_q.csv')
    
    return brent_m, brent_q

In [10]:
brent_m, brent_q = preprocess_brent_m(source_file='brent_1990_2023', source_date_col='DATE', var_name='brent')
brent_m

Unnamed: 0,date,year,month,brent
0,2000-01-01,2000,1,25.51
1,2000-02-01,2000,2,27.78
2,2000-03-01,2000,3,27.49
3,2000-04-01,2000,4,22.76
4,2000-05-01,2000,5,27.74
...,...,...,...,...
235,2019-08-01,2019,8,59.04
236,2019-09-01,2019,9,62.83
237,2019-10-01,2019,10,59.71
238,2019-11-01,2019,11,63.21


In [117]:
country_col

'country'

In [18]:
key_cols = [country_col, date_col, year_col, month_col]
key_cols.remove(country_col)
key_cols

['date', 'year', 'month']

In [34]:
x = np.log(brent_m.set_index(date_col)['brent']).diff(12).dropna()
x
stat_test(x=x, sign_level=0.05)

'non_stationary'