In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm
import scienceplots

import yaml
import os
import sys
from dotenv import load_dotenv

load_dotenv()
REPO_PATH = os.getenv("REPO_PATH")
plt.style.use('science')

# Import main utility functions
sys.path.insert(0, rf'{REPO_PATH}src')
from utils.main_utils import load_processed


In [None]:
FUTURES = ['CLc1', 'LCOc1']
TOPICS = ['CRU', 'CWP', 'CEN']

# Load the YAML variable config file
with open(f'{REPO_PATH}variable_config.yaml', 'r') as file:
    var_config = yaml.load(file, Loader=yaml.FullLoader)

dfs = load_processed(FUTURES)

df = dfs['CLc1']

topic_names: dict[int, str] = {
    0: 'Securities and Commodity Markets',
    1: 'Interest Rates and Economic Policy',
    2: 'Geopolitical Conflicts',
    3: 'Banking and Finance',
    4: 'Oil and Gas Production'
}


In [None]:
CT_VADER = [tag for tag in var_config['SENT_CT'] if 'VADER' in tag]

fig, ax = plt.subplots(1, 1, figsize=(10, 5), dpi=200)
ax2 = ax.twinx()

top_num = [string.split('_')[1] for string in CT_VADER]
labels = [topic_names[int(num)] for num in top_num]

df[CT_VADER].rolling(288*3).mean().plot(ax=ax, lw=0.4)
# add legend
ax.legend(labels)


In [None]:
CT_textblob = [tag for tag in var_config['SENT_CT'] if 'TextBlob' in tag]

fig, ax = plt.subplots(1, 1, figsize=(10, 5), dpi=200)
ax2 = ax.twinx()

top_num = [string.split('_')[1] for string in CT_textblob]
labels = [topic_names[int(num)] for num in top_num]

df[CT_textblob].rolling(288*3).mean().plot(ax=ax, lw=0.4)
# add legend
ax.legend(labels)

### Correlation matrix

In [None]:
# create a correlation matrix

corr = df[var_config['SENT_CT']].corr()

# use sns to create a heatmap
fig, ax = plt.subplots(1, 1, figsize=(10, 10), dpi=200)
sns.heatmap(corr, ax=ax, cmap='coolwarm_r', annot=True)
fig.tight_layout()