In [1]:
import os
import sys
sys.path.insert(0, "../../")
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [62]:
folder = sys.path[0] + "outputs/text/"
epu_files = [
    f"{folder}{con}/epu/{con}_epu.csv" for con in os.listdir(folder)
    if con not in [".DS_Store", "tonga", "pacific"]
]
sentiment_files = [
    f"{folder}{con}/sentiment/{con}_sentiment.csv" for con in os.listdir(folder)
    if con not in [".DS_Store", "tonga", "pacific"]
]

epu_inflation_files = [
    f"{folder}{con}/epu/{con}_epu_inflation.csv" for con in os.listdir(folder)
    if con not in [".DS_Store", "tonga", "pacific"]
]

In [80]:
inflation_df = pd.DataFrame()
for file in epu_inflation_files:
    country_name = file.split("/")[-1].replace("_epu_inflation.csv", "")
    df = pd.read_csv(file).drop("Unnamed: 0", axis=1)
    df["date"] = pd.to_datetime(df["date"])
    temp = df[["date", "epu_weighted", "epu_unweighted"]]
    temp.columns = [
        f"inflation_{i}" if i != "date" else i for i in temp.columns
    ]
    temp["country"] = country_name
    inflation_df = pd.concat([inflation_df, temp],
                             axis=0).reset_index(drop=True)

In [81]:
df = pd.read_csv("epu_macro.csv")
df["date"] = pd.to_datetime(df["date"])
inflation_df["date"] = pd.to_datetime(inflation_df["date"])

In [82]:
df = df.merge(inflation_df, how="outer", on=["country", "date"])
df.to_csv("epu_macro_2.csv", encoding="utf-8")

In [83]:
df["inflation_epu_weighted"] = df["inflation_epu_weighted"].fillna(0)

In [84]:
df

Unnamed: 0,country,date,epu_weighted,epu_unweighted,sentiment,cpi,inflation_epu_weighted,inflation_epu_unweighted
0,fiji,2015-01-01,0.000000,0.000000,0.968600,,0.000000,0.000000
1,fiji,2015-02-01,0.000000,0.000000,0.796850,,0.000000,0.000000
2,fiji,2015-03-01,11.154694,56.768133,0.940367,,19.196975,92.090747
3,fiji,2015-04-01,0.000000,0.000000,0.704729,,0.000000,0.000000
4,fiji,2015-05-01,80.249882,47.437027,-0.101017,,0.000000,0.000000
...,...,...,...,...,...,...,...,...
534,samoa,2023-08-01,126.604968,88.239137,0.766190,132.8,95.383695,68.206170
535,samoa,2023-09-01,176.511491,124.056035,0.717623,131.8,142.481868,102.740940
536,samoa,2023-10-01,103.438953,71.492475,0.702316,132.3,48.706568,34.538444
537,samoa,2023-11-01,150.670375,104.568869,0.739673,132.4,189.190800,134.714261


In [87]:
import statsmodels.formula.api as smf
smf.ols("cpi~C(country) + epu_weighted", data=df).fit().summary()

0,1,2,3
Dep. Variable:,cpi,R-squared:,0.631
Model:,OLS,Adj. R-squared:,0.627
Method:,Least Squares,F-statistic:,162.1
Date:,"Tue, 02 Apr 2024",Prob (F-statistic):,3.21e-61
Time:,12:18:28,Log-Likelihood:,-1123.0
No. Observations:,288,AIC:,2254.0
Df Residuals:,284,BIC:,2269.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,112.2130,1.577,71.151,0.000,109.109,115.317
C(country)[T.papua_new_guinea],31.2723,1.771,17.657,0.000,27.786,34.758
C(country)[T.samoa],-2.2791,1.803,-1.264,0.207,-5.828,1.269
C(country)[T.solomon_islands],0,0,,,0,0
C(country)[T.vanuatu],0,0,,,0,0
epu_weighted,0.0129,0.006,2.315,0.021,0.002,0.024

0,1,2,3
Omnibus:,2.956,Durbin-Watson:,0.1
Prob(Omnibus):,0.228,Jarque-Bera (JB):,2.69
Skew:,0.229,Prob(JB):,0.261
Kurtosis:,3.123,Cond. No.,inf


In [3]:
epu_df = pd.DataFrame()
for file in epu_files:
    country_name = file.split("/")[-1].replace("_epu.csv", "")
    df = pd.read_csv(file).drop("Unnamed: 0", axis=1)
    df["date"] = pd.to_datetime(df["date"])
    temp = df[["date", "epu_weighted", "epu_unweighted"]]
    temp["country"] = country_name
    epu_df = pd.concat([epu_df, temp], axis=0).reset_index(drop=True)

In [4]:
sentiment_df = pd.DataFrame()
for file in sentiment_files:
    country_name = file.split("/")[-1].replace("_sentiment.csv", "")
    df = pd.read_csv(file).drop("Unnamed: 0", axis=1)
    df["date"] = pd.to_datetime(df["date"])
    temp = df[["date", "score"]].rename({"score": "sentiment"}, axis=1)
    temp["country"] = country_name
    sentiment_df = pd.concat([sentiment_df, temp], axis=0).reset_index(drop=True)

In [5]:
combined_df = epu_df.merge(sentiment_df, how="outer", on=["country", "date"])
combined_df = combined_df[[
    "country", "date", "epu_weighted", "epu_unweighted", "sentiment"
]]
combined_df.to_csv("epu_macro.csv", encoding="utf-8")