In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scienceplots
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import seaborn as sns

from IPython.display import display, clear_output
import warnings

from scipy.stats import skew, kurtosis

import os
import sys
from dotenv import load_dotenv

load_dotenv()

REPO_PATH = os.getenv('REPO_PATH')

sys.path.insert(0, rf'{REPO_PATH}src')
from utils.main_utils import load_json

warnings.filterwarnings("ignore")
plt.style.use('science')

In [None]:
CONFIG = load_json(rf'{REPO_PATH}src_HF\plot_config.json')

df_LCOc1 = pd.read_csv(rf'{REPO_PATH}data\time_series\LCOc1_High_Frequency.csv') # Brent Crud Oil
df_CLc1 = pd.read_csv(rf'{REPO_PATH}data\time_series\CLc1_High_Frequency.csv') # WTI Crud Oil

# create datetime index
df_LCOc1.index = pd.to_datetime(df_LCOc1['Date'])
df_CLc1.index = pd.to_datetime(df_CLc1['Date'])


### Price plot

In [None]:
LABELS = ['ICE Brent Crude (LCOc1)', 'NYMEX WTI Crude (CLc1)']
PALETTE = sns.color_palette('twilight', n_colors=2)

fig, ax = plt.subplots(figsize=(10,5), dpi=200)
df_LCOc1['CLOSE'].plot(label=LABELS[0], ax=ax, lw=0.8, color=PALETTE[0])
df_CLc1['CLOSE'].plot(label=LABELS[1], ax=ax, lw=0.8, color=PALETTE[1])
ax.set_ylabel('Price (USD)', fontsize=14)
ax.set_xlabel('Date', fontsize=14)
ax.legend(frameon=False, loc='upper left', fontsize=13)
ax.tick_params(axis='x', labelsize=14)
ax.grid(alpha=0.2)

fig.savefig('images/crude_oil_price.png', bbox_inches='tight')

### Log returns analysis

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10,5), dpi=200, sharex=True)

df_list = []

for i, df in enumerate([df_LCOc1, df_CLc1]):
    logret = np.log(df['CLOSE']).diff()
    logret_resampled = logret.resample('1D').sum() * 100
    df = pd.DataFrame(logret_resampled.describe()).T
    df['skew'] = skew(logret_resampled.dropna())
    df['kurtosis'] = kurtosis(logret_resampled.dropna())
    df.index = [LABELS[i]]
    df_list.append(df.T)

    axs[i].plot(logret * 100, lw=0.8, color=PALETTE[i], label=LABELS[i])
    axs[i].set_ylabel('Log Return (\%)', fontsize=16)
    axs[i].legend(frameon=False, loc='upper left', fontsize=15)

axs[-1].set_xlabel('Date', fontsize=16)
fig.tight_layout(pad=-0.5)

df = pd.concat(df_list, axis=1)

display(df)

fig.savefig('images/crude_oil_log_returns.png', bbox_inches='tight')

### Volume plot

In [None]:

volume_df = pd.concat(
    [
        df_LCOc1['VOLUME'].resample('w').sum(), 
        df_CLc1['VOLUME'].resample('w').sum()
    ], axis=1
)
volume_df.columns = LABELS

fig, ax = plt.subplots(figsize=(10,5), dpi=200)

volume_df.plot(kind='bar', stacked=True, ax=ax, color=PALETTE, width=0.8, alpha=1)

ax.set_ylabel('Volume', fontsize=14)
ax.set_xlabel('Date', fontsize=14)
ax.legend(frameon=False, loc='upper left', fontsize=13)
ax.tick_params(axis='x', labelsize=14)

# fix x-tick labels to show month and year and only show every 4th label and .strftime('%Y-%m')
plt.xticks(
    np.arange(0, len(volume_df), 4), 
    volume_df.index[np.arange(0, len(volume_df), 4)].strftime('%Y-%m'), 
    rotation=45
)

fig.savefig('images/crude_oil_volume_stacked.png', bbox_inches='tight')

### Price last 20 years

In [None]:
df_CLc1_daily = pd.read_csv(rf'{REPO_PATH}data\time_series\CLc1.csv')
df_LCOc1_daily = pd.read_csv(rf'{REPO_PATH}data\time_series\LCOc1.csv')

df_CLc1_daily.index = pd.to_datetime(df_CLc1_daily['Date'])
df_LCOc1_daily.index = pd.to_datetime(df_LCOc1_daily['Date'])

fig, ax = plt.subplots(figsize=(10,4), dpi=200)
df_LCOc1_daily['CLOSE'].plot(label=LABELS[0], ax=ax, lw=0.6, color=PALETTE[0])
df_CLc1_daily['CLOSE'].plot(label=LABELS[1], ax=ax, lw=0.6, color=PALETTE[1])
ax.set_ylabel('Price (USD)', fontsize=14)
ax.set_xlabel('Date', fontsize=14)
ax.legend(frameon=False, loc='lower left', fontsize=13)
ax.tick_params(axis='x', labelsize=12)
ax.grid(alpha=0.2)
ax.set_axisbelow(True)

fig.savefig('images/crude_oil_price_daily.png', bbox_inches='tight')
