# MFHT Grid Plot

In [None]:
import sqlite3

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

from stabilvol.utility import functions as f

DATABASE = 'data/processed/trapezoidal_selection/stabilvol.sqlite'
# Connect to the SQLite database
conn = sqlite3.connect(DATABASE)
cur = conn.cursor()

In [None]:
import os
print(os.getcwd())
os.path.exists(DATABASE)

In [None]:
def select_bins(df, max_n=1000):
    nbins = 50
    
    while True:
        # Use qcut to bin 'Volatility' values
        df['Bins'] = pd.qcut(df['Volatility'], nbins, duplicates='drop')
        
        # Group by the bins and calculate the mean and standard error of 'value'
        grouped = df.groupby('Bins')['FHT'].agg(['mean', error_on_the_mean, 'size'])
        count = grouped['size'].min()
        
        if count < max_n or nbins > 1000:
            break
        else:
            nbins += 50
    return grouped

def error_on_the_mean(values):
    return np.std(values)/np.sqrt(len(values))

In [None]:
START_LEVELS = [-2.0, -1.0, -0.5, -0.2, -0.1, 0.1, 0.2, 0.5, 1.0, 2.0]
DELTAS = [2.0, 1.0, 0.5, 0.2, 0.1, -0.1, -0.2, -0.5, -1.0, -2.0]
LEVELS = {
    (start, round(start+delta, 2)) for start in START_LEVELS for delta in DELTAS
}

VOL_LIMIT=0.05
MARKET='UW'
LEVELS

In [None]:
def make_megadataframe(market):
    df_dict = {}
    for t1, t2 in LEVELS:
        # Create the strings for the threshold values
        t1_string = f.stringify_threshold(t1)
        t2_string = f.stringify_threshold(t2)
        # Load the dataframe from the database if it exists
        try:            
            # Write the SQL query
            query = f'''
            SELECT *
            FROM stabilvol_{t1_string}_{t2_string}
            WHERE Volatility < {VOL_LIMIT} AND Market = "{market}"
            '''
            # Load the data from the database
            df = pd.read_sql_query(query, conn)
        except pd.errors.DatabaseError:
            print(f'No data for market {market} with thresholds {t1_string}-{t2_string}')
        else:
            grouped_data = select_bins(df)
            df_dict[(t1, t2)] = grouped_data
    return df_dict

In [None]:
market = "UN"
megaframe = make_megadataframe(market)

thresholds_table = []

for (t1, t2), data in megaframe.items():
    t1_string = f.stringify_threshold(t1)
    t2_string = f.stringify_threshold(t2)
    # data.to_pickle(f'data/processed/trapezoidal_selection/mfht_{market}_{t1_string}_{t2_string}.pkl')
    thresholds_table.append([t1, t2])
    
del megaframe
del data

In [None]:
market = "UW"
megaframe = make_megadataframe(market)

thresholds_table = []

for (t1, t2), data in megaframe.items():
    t1_string = f.stringify_threshold(t1)
    t2_string = f.stringify_threshold(t2)
    # data.to_pickle(f'data/processed/trapezoidal_selection/mfht_{market}_{t1_string}_{t2_string}.pkl')
    thresholds_table.append([t1, t2])
    
del megaframe
del data

In [None]:
market = "LN"
megaframe = make_megadataframe(market)

thresholds_table = []

for (t1, t2), data in megaframe.items():
    t1_string = f.stringify_threshold(t1)
    t2_string = f.stringify_threshold(t2)
    # data.to_pickle(f'data/processed/trapezoidal_selection/mfht_{market}_{t1_string}_{t2_string}.pkl')
    thresholds_table.append([t1, t2])
    
del megaframe
del data

In [None]:
from IPython.display import display, Markdown

# Create a dictionary where keys are the unique first elements and values are lists of corresponding second elements
table_dict = {}

thresholds_table = [[t1, t2] for t1, t2 in sorted(thresholds_table, key=lambda x: float(x[0]), reverse=False)]
for item in thresholds_table:
    if item[0] not in table_dict:
        table_dict[item[0]] = [item[1]]
    else:
        table_dict[item[0]].append(item[1])

# Create the markdown table
markdown_table = f"| Start Threshold {'| End |'*1} |\n{'|:-------:|'*1}\n"
for key, values in table_dict.items():
    markdown_table += f"| {key} | {'| '.join(sorted(values, key=lambda x: float(x)))} |\n"

# Display the markdown table
display(Markdown(markdown_table))

In [None]:
market = "UN"
n_rows = 10
n_cols = 10

# Create a grid of subplots
fig, axs = plt.subplots(n_rows, n_cols, layout='constrained', figsize=(16, 16))

# Flatten the array of axes
axs = axs.flatten()

# Iterate over the indices and axes
for i, ((t1, t2), ax) in enumerate(zip(LEVELS, axs)):
    # Create the strings for the threshold values
    t1_string = f.stringify_threshold(t1)
    t2_string = f.stringify_threshold(t2)
    
    # Load the dataframe from the database if it exists
    try:
        df = pd.read_pickle(f'data/processed/trapezoidal_selection/mfht_{market}_{t1_string}_{t2_string}.pkl')
    except FileNotFoundError:
        continue
    else:
        
        x = df.index.categories.left.values
        y = df['mean'].values
        
        y_err = df['error_on_the_mean'].values
        
        ax.plot(x, y)
        ax.fill_between(x, y - y_err, y + y_err, alpha=0.2)
        
        # If this is the first column, set the y-label
        if i % n_cols == 0:
            ax.set_ylabel(f"$\\theta_i = {t1}$")
        # Always set the title with the final threshold value
        ax.set_title(f"$\\theta_f = {t2}$")
        
        # ax.set_xlim(0, 1)

plt.show()
fig.savefig('data/processed/trapezoidal_selection/grid.png')