# MFHT Rolling Windows Plot

In [None]:
import sqlite3

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime

from stabilvol.utility import functions as f

DATABASE = '../data/processed/trapezoidal_selection/stabilvol.sqlite'
# Connect to the SQLite database
conn = sqlite3.connect(DATABASE)
cur = conn.cursor()

In [None]:
import os
print(os.getcwd())
os.path.exists(DATABASE)

In [None]:
MARKETS = ["UN", "UW", "LN", "JT"]
START_DATE = "1980-01-01"
END_DATE = "2022-07-01"

START_LEVELS = [-2.0, -1.0, -0.5, -0.2, -0.1, 0.1, 0.2, 0.5, 1.0, 2.0]
DELTAS = [2.0, 1.0, 0.5, 0.2, 0.1, -0.1, -0.2, -0.5, -1.0, -2.0]
LEVELS = {
    (start, round(start+delta, 2)) for start in START_LEVELS for delta in DELTAS
}
LEVELS = sorted(LEVELS)

VOL_LIMIT= 0.5  # Change this will change all the pickle files, remember to re-generate them

In [None]:
def select_bins(df, max_n=1000):
    nbins = 25
    
    while True:
        # Use qcut to bin 'Volatility' values
        df['Bins'] = pd.qcut(df['Volatility'], nbins, duplicates='drop')
        
        # Group by the bins and calculate the mean and standard error of 'value'
        grouped = df.groupby('Bins')['FHT'].agg(['mean', error_on_the_mean, 'size'])
        count = grouped['size'].min()
        
        if count < max_n or nbins > 1000:
            break
        else:
            nbins += 20
    return grouped, nbins

def error_on_the_mean(values):
    return np.std(values)/np.sqrt(len(values))

In [None]:
def query_binned_data(market: str, start_date:str, end_date:str = None, vol_limit:float = 0.5, t1_string:str = "m0p5", t2_string:str = "m1p5"):
    grouped_data = None
    end_date = '2023-01-01' if end_date is None else end_date
    try:            
        # Write the SQL query
        query = f'''
        SELECT *
        FROM stabilvol_{t1_string}_{t2_string}
        WHERE Volatility < {vol_limit} 
        AND Market = "{market}"
        AND start >= "{start_date}"
        AND end <= "{end_date}"    
        '''
        # Load the FHT data from the database
        df = pd.read_sql_query(query, conn)
    except pd.errors.DatabaseError:
        print(f'No data for market {market} with thresholds {t1_string}-{t2_string}')
        nbins = 0
    else:
        if len(df) > 50:
            return  select_bins(df)
        else:
            raise ValueError(f'Not enough data for market {market} with thresholds {t1_string}-{t2_string} from {start_date} to {end_date}')

In [None]:
def create_dataset(markets, windows, t1_string, t2_string):
    outcasts = {market: [] for market in markets}
    df = pd.DataFrame()
    for market in markets:
        for start_date, end_date in tqdm(windows, desc=market):
            try:
                mfht, nbins = query_binned_data(market, start_date, end_date, VOL_LIMIT, t1_string=t1_string, t2_string=t2_string)         
            except ValueError:
                outcasts[market].append((start_date, end_date))
            else:
                mfht['start'] = start_date
                mfht['end'] = end_date
                mfht['market'] = market
                df = pd.concat([df, mfht.reset_index()])
                
    return df, outcasts

In [None]:
def take_maxs(market, windows, coefficients, regenerate=False):
    if regenerate:  
        max_values = np.zeros((len(coefficients), len(windows)))
        for i, (t1_string, t2_string) in enumerate(coefficients):
            df = pd.read_pickle(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle')
            for j, (start_date, end_date) in tqdm(enumerate(windows), desc=market):
                mfht = df[(df['start'] == start_date) & (df['end'] == end_date)]       
                if not mfht.empty:            
                    max_values[i, j] = mfht['mean'].max()
                        
        np.save(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.npy', max_values)
    else:
        max_values = np.load(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.npy')
    return max_values

In [None]:
def generate_quarters(start_date, end_date, freq='Q'):
    # Generate all quarters between start and end date
    quarters = list(pd.date_range(start_date, end_date, freq=freq))
    if quarters[0].date() > start_date.date():
        quarters.insert(0, start_date)
    if quarters[-1].date() < end_date.date():
        quarters.append(pd.to_datetime(end_date))

    return pd.to_datetime(quarters)


def add_ticks(ax, windows, coeff, outcasts, highlights=True):
    label_dates = [start_date for start_date, end_date in windows]
    label_dates.append(windows[-1][1])
    label_dates = pd.to_datetime(label_dates)
    quarters = generate_quarters(label_dates[0], label_dates[-1], 'Y')
    tick_coords = []
    for qdate in quarters:
        if np.where(label_dates == qdate)[0].size > 0:
            tick_coords.append(np.where(label_dates == qdate)[0][0])
    ax.set_xticks(tick_coords, labels=[label_dates[j].strftime('%Y-%m-%d') for j in tick_coords], minor=False, 
                  fontsize=11, rotation=90, rotation_mode='anchor', ha='right', va='center_baseline')
    ax.set_title(' '.join([r'$\theta_i$=', f.numerify_threshold(coeff[0]), r'/ $\theta_f$=', f.numerify_threshold(coeff[1])]), fontsize=12)
    # Remove yticks
    ax.yaxis.set_ticks([])
    
    outcast_dates = [(pd.to_datetime(start), pd.to_datetime(end)) for start, end in outcasts]
    for outcast in outcast_dates:
        # Find the indices of the start and end labels
        start_index = np.where(label_dates <= outcast[0])
        # Since only the end date is labeled, if the first start date is an outcast, it must be set manually
        start_index = start_index[0][-1] if len(start_index[0]) > 0 else 0
        end_index = np.where(label_dates >= outcast[1])[0][0]
        ax.axvspan(start_index-0.5, end_index-0.5, color='black')
    
    if highlights:
        # Find the indices of the start and end labels
        start_index = np.where(label_dates <= pd.to_datetime('2006-12-31'))[0][-1]
        end_index = np.where(label_dates >= pd.to_datetime('2008-12-31'))[0][0]
    
        # Add vertical lines at the start and end of the region
        ax.axvline(start_index, color='k', linestyle='--', linewidth=1.5)
        ax.axvline(end_index, color='k', linestyle='--', linewidth=1.5)


def plot_rolling_heatmap(coefficients, windows, maxs=None, outcasts=None, **kwargs):
    if outcasts is None:
        outcasts = {(t1, t2): [] for t1, t2 in coefficients}
        search_outcasts = True
    else:
        search_outcasts = False
    if kwargs.get('latex', False):    
        # Use LaTeX for text rendering
        plt.rcParams['text.usetex'] = True
        plt.rcParams['font.family'] = 'serif'
    
    fig, axs = plt.subplots(len(coefficients), figsize=(12, 4.5), sharex=True)
    if kwargs.get('suptitle', None) is not None:
        fig.suptitle(kwargs.get('suptitle'), fontsize=16)

    max_values = np.zeros((len(coefficients), len(windows))) if maxs is None else maxs
    

    for i, (coeff, ax) in enumerate(zip(coefficients, axs.flatten())):
        for j, (start_date, end_date) in enumerate(windows):
            if maxs is None:
                data = pd.read_pickle(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{coeff[0]}_{coeff[1]}_{VOL_LIMIT}.pickle')
                # Calculate the peaks
                mfht = data[(data['start'] == start_date) & (data['end'] == end_date)]       
                if mfht.empty:            
                    outcasts[coeff].append((start_date, end_date))
                else:
                    max_values[i, j] = mfht['mean'].max()
            elif search_outcasts:
                # See where the max is zero and label it as outcast
                if max_values[i, j] == 0:
                    outcasts[coeff].append((start_date, end_date))
                

        im = ax.imshow(max_values[i].reshape(1, -1), cmap='coolwarm', aspect='auto', vmin=max_values[i].min(), vmax=max_values[i].max())
        # Add ticks to the plot
        add_ticks(ax, windows, coeff, outcasts[coeff])
        # Set the colorbar for each plot showing only maximum and minimum values
        cbar = fig.colorbar(im, ax=ax, orientation='vertical', pad=0.01, aspect=8)
        cbar.set_ticks([max_values[i].min(), max_values[i].mean(), max_values[i].max()])
    
    # axs[0].text(0.57, 1.1, '2006-12-31', fontsize=11, transform=axs[0].transAxes, horizontalalignment='left')
    # axs[0].text(0.73, 1.1, '2008-12-31', fontsize=11, transform=axs[0].transAxes, horizontalalignment='right')
    
    fig.tight_layout()
    
    # Add a colorbar
    # cbar = fig.colorbar(im, ax=axs.ravel().tolist(), pad=0.01)
    # cbar.set_label('Maximum MFHT', rotation=270, labelpad=15)

    plt.show()
    return fig, outcasts

## Rolling Windows

In [None]:
def roll_windows(duration=90,  start_date=None, end_date=None):
    # Define the start and end dates
    start_date = datetime.date(1980, 1, 1) if start_date is None else start_date
    end_date = datetime.date(2022, 7, 1) if end_date is None else end_date
    
    start = start_date + pd.to_timedelta(duration/2, 'D')
    end = end_date - pd.to_timedelta(duration/2, 'D')
    return [(mid - pd.to_timedelta(duration//2, 'D'), mid + pd.to_timedelta(duration//2, 'D')) for mid in pd.date_range(start, end, freq='D')]

In [None]:
windows = roll_windows(90, start_date=datetime.date(1980, 1, 1), end_date=datetime.date(2022, 12, 31))
len(windows)

In [None]:
# If change this, remember to re-generate all the max_values (regenerate=True)
coefficients = [
    # ("m1p0", "m3p0"), 
    ("m0p5", "m1p5"), 
    ("0p5", "m1p5"), 
    ("0p5", "1p0"),
    ("1p0", "3p0"),
]

## UN

In [None]:
market = "UN"

In [None]:
regenerate = False
for t1_string, t2_string in coefficients:
    if not os.path.exists(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle') or regenerate:
        print(f"Generating {market} with thresholds {t1_string}-{t2_string}")
        # Data must be regenerate
        df, outcasts = create_dataset([market], windows, t1_string, t2_string)
        print(f"There are {len(outcasts[market])} outcasts")
        # df['thresholds'] = f'{t1_string}_{t2_string}'
        df.to_pickle(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle')
regenerate = False

In [None]:
max_values = take_maxs(market, windows, coefficients, regenerate=True)
print(max_values.shape)

In [None]:
fig, errors = plot_rolling_heatmap(coefficients, windows[:-95], max_values[:, :-95], outcasts if regenerate else None, latex=True, suptitle=market)

In [None]:
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.png', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.eps', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.pdf', bbox_inches='tight')

## UW

In [None]:
market = "UW"

In [None]:
regenerate = False
for t1_string, t2_string in coefficients:
    if not os.path.exists(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle') or regenerate:
        print(f"Generating {market} with thresholds {t1_string}-{t2_string}")
        # Data must be regenerate
        df, outcasts = create_dataset([market], windows, t1_string, t2_string)
        print(f"There are {len(outcasts[market])} outcasts")
        # df['thresholds'] = f'{t1_string}_{t2_string}'
        df.to_pickle(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle')
regenerate = False

In [None]:
max_values = take_maxs(market, windows, coefficients, regenerate=True)
print(max_values.shape)

In [None]:
fig, errors = plot_rolling_heatmap(coefficients, windows[:-95], max_values[:, :-95], outcasts if regenerate else None, latex=True, suptitle=market)

In [None]:
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.png', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.eps', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.pdf', bbox_inches='tight')

## LN

In [None]:
market = "LN"

In [None]:
regenerate = False
for t1_string, t2_string in coefficients:
        df = pd.DataFrame()
        if not os.path.exists(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle') or regenerate:
            print(f"Generating {market} with thresholds {t1_string}-{t2_string}")
            # Data must be regenerate
            df, outcasts = create_dataset([market], windows, t1_string, t2_string)
            print(f"There are {len(outcasts[market])} outcasts")
            # df['thresholds'] = f'{t1_string}_{t2_string}'
            df.to_pickle(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle')
regenerate = False

In [None]:
max_values = take_maxs(market, windows, coefficients, regenerate=True)

In [None]:
fig, errors = plot_rolling_heatmap(coefficients, windows[:-95], max_values[:, :-95], outcasts if regenerate else None, latex=True, suptitle=market)

In [None]:
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.png', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.eps', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.pdf', bbox_inches='tight')

## JT

In [None]:
market = "JT"

In [None]:
regenerate = False
for t1_string, t2_string in coefficients:
    df = pd.DataFrame()
    if not os.path.exists(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle') or regenerate:
        print(f"Generating {market} with thresholds {t1_string}-{t2_string}")
        # Data must be regenerate
        df, outcasts = create_dataset([market], windows, t1_string, t2_string)
        print(f"There are {len(outcasts[market])} outcasts")
        # df['thresholds'] = f'{t1_string}_{t2_string}'
        df.to_pickle(f'../data/processed/dynamics/{market}_rolling_MFHT_peaks_{t1_string}_{t2_string}_{VOL_LIMIT}.pickle')
regenerate = False

In [None]:
max_values = take_maxs(market, windows, coefficients, regenerate=True)

In [None]:
fig, errors = plot_rolling_heatmap(coefficients, windows[:-95], max_values[:, :-95], outcasts if regenerate else None, latex=True, suptitle=market)

In [None]:
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.png', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.eps', bbox_inches='tight') 
fig.savefig(f'../visualization/dynamics/rolling_windows/{market}_rolling_MFHT_peaks_variousthresholds_{VOL_LIMIT}.pdf', bbox_inches='tight')