In [None]:
import pandas as pd
import numpy as np
import yaml
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import warnings
import scienceplots
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller

import sys
import os
from dotenv import load_dotenv

load_dotenv()
warnings.filterwarnings("ignore")
plt.style.use('science')

REPO_PATH = os.getenv("REPO_PATH")

# Import main utility functions
sys.path.insert(0, rf'{REPO_PATH}src')
from utils.var_utils import plot_criterion, grangers_causation, SentVAR
from utils.main_utils import load_processed

### Load data

In [None]:
FUTURES = ['CLc1', 'LCOc1']

def sentiment_cols(tags: list[str]) -> list[str]:
    analyzers = ['VADER', 'TextBlob']
    return [f"{tag}_{analyzer}" for analyzer in analyzers for tag in tags]

# Load the YAML variable config file
with open(f'{REPO_PATH}variable_config.yaml', 'r') as file:
    variable_config = yaml.load(file, Loader=yaml.FullLoader)
    VAR_variables = variable_config['VAR_TOPICS']

dfs = load_processed(FUTURES)

### Stationarity of time series with ADF

In [None]:
INSTRUMENT = 'CLc1'

df = dfs[INSTRUMENT]

VAR_variables = variable_config['TEMPORAL']

results = {}
for col in tqdm(VAR_variables, desc='Stationarity test'):
    result = adfuller(df[col])
    results[col] = result[:2]

res_df = pd.DataFrame(results).T
res_df.columns = ['ADF Statistic', 'p-value']

print(f'ADFuller test results for {INSTRUMENT} sentiment data:')
pd.options.display.float_format = '{:.4f}'.format
display(res_df)

### VAR Optmal lag order, Impulse response function

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 4), dpi=200)

for i, (key, df) in enumerate(tqdm(dfs.items(), desc='Lag order selection')):
    model = VAR(df[VAR_variables])
    lag_order = model.select_order(30, trend='c')
    plot_criterion(lag_order, ax[i], key)

fig.tight_layout()
fig.savefig(rf'images/lag_order.png')


### Granger causality

In [None]:
df = dfs['LCOc1']
CT_colums = df.filter(like='CT').columns.to_list()


var_set_id = ['VAR_TOPICS_BASE', 'VAR_TOPICS_IT', 'VAR_TOPICS_CT']
var_sets = [variable_config[var_set] for var_set in var_set_id]

for var_set in var_sets:
    gc_df = grangers_causation(df, var_set, 'REALIZED_VOL')
    display(gc_df)


### Topic

In [None]:
ANALYZER = 'TextBlob'

fig, axs = plt.subplots(1, 3, figsize=(15, 5), dpi=200)
axs = axs.flatten()

labels = [
    'Crude Oil',
    'Conflict, War and Peace',
    'Central Banks'
]

for i, topic in enumerate(tqdm(variable_config['TOPICS_BASE'])):
    sent_var = SentVAR(dfs, topic, ANALYZER)
    sent_var.plot_irf(axs[i])
    if i == 0:
        fig.legend(
            loc='lower center', 
            bbox_to_anchor=(0.5, -0.1), 
            ncol=4, 
            fontsize=17
        )

    axs[i].set_title(
            f'{topic.split("_")[0]}\n{labels[i]}', 
            fontsize = 15,
            pad=10
        )

fig.tight_layout()

fig.savefig(rf'images/irf_topics_{ANALYZER}.png')


### Inter-topic

In [None]:
CRU_labels = {
    0: 'Crude Oil Production and Prices',
    1: 'Financial Markets and Economic Indicators',
    2: 'Financial Instruments and Regulations'
}

CWP_labels = {
    0: 'Middle East and Eastern Europe Conflicts',
    1: 'International Security and Diplomacy',
    2: 'Domestic Unrest and Government Actions'
}

CEN_labels = {
    0: 'US Federal Reserve and Monetary Policy',
    1: 'Economic Conditions and Government Policies',
    2: 'Financial Markets and Global Banking'
}

labels = [CRU_labels, CWP_labels, CEN_labels]

fig, axs = plt.subplots(3, 3, figsize=(15, 15), dpi=200)
axs = axs.flatten()

for i, topic in enumerate(tqdm(variable_config['TOPICS_IT'])):
    
    sent_var = SentVAR(dfs, topic, ANALYZER)
    sent_var.plot_irf(axs[i])

    axs[i].set_title(
            f'{topic.split("_")[0]} {i % 3}\n{labels[i // 3][i % 3]}', 
            fontsize = 15,
            pad=10
            )

    if i == 0:
        fig.legend(
            loc='lower center', 
            bbox_to_anchor=(0.5, -0.1/3), 
            ncol=4, 
            fontsize=15
        )

fig.tight_layout()

fig.savefig(rf'images/irf_inter_topics_{ANALYZER}.png')

### Cross-topic

In [None]:
topic_labels: dict[int, str] = {
    0: 'Securities and Commodity Markets',
    1: 'Interest Rates and Economic Policy',
    2: 'Geopolitical Conflicts',
    3: 'Banking and Finance',
    4: 'Oil and Gas Production'
}

fig = plt.figure(figsize=(15, 10), dpi=200)

locs = [(0,0), (0,2), (0,4), (1,1), (1,3)]
axs = [plt.subplot2grid((2,6), loc, colspan=2, fig=fig) for loc in locs]

for i, topic in enumerate(tqdm(variable_config['TOPICS_CT'])):
    sent_var = SentVAR(dfs, topic, ANALYZER)
    sent_var.plot_irf(axs[i])
    axs[i].set_title(
        f'$\\mathbf{{Topic\\ {i + 1}}}$ - {topic_labels[i]}', 
        fontsize=15,
        pad=10
    )
    if i == 0:
        fig.legend(
            loc='lower center', 
            bbox_to_anchor=(0.5, -0.1/2), 
            ncol=4, 
            fontsize=15
        )

fig.tight_layout()

fig.savefig(rf'images/irf_cross_topics_{ANALYZER}.png')