# MFHT Grid Plot

In [None]:
import sqlite3

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm

from scipy.optimize import curve_fit

from stabilvol.utility import functions as f

DATABASE = '../data/processed/trapezoidal_selection/stabilvol.sqlite'
# Connect to the SQLite database
conn = sqlite3.connect(DATABASE)
cur = conn.cursor()

In [None]:
import os
print(os.getcwd())
os.path.exists(DATABASE)

In [None]:
def select_bins(df, max_n=1000):
    nbins = 50
    
    while True:
        # Use qcut to bin 'Volatility' values
        df['Bins'] = pd.qcut(df['Volatility'], nbins, duplicates='drop')
        
        # Group by the bins and calculate the mean and standard error of 'value'
        grouped = df.groupby('Bins')['FHT'].agg(['mean', error_on_the_mean, 'size'])
        count = grouped['size'].min()
        
        if count < max_n or nbins > 1000:
            break
        else:
            nbins += 50
    return grouped

def error_on_the_mean(values):
    return np.std(values)/np.sqrt(len(values))

In [None]:
START_LEVELS = [-2.0, -1.0, -0.5, -0.2, -0.1, 0.1, 0.2, 0.5, 1.0, 2.0]
DELTAS = [2.0, 1.0, 0.5, 0.2, 0.1, -0.1, -0.2, -0.5, -1.0, -2.0]
LEVELS = {
    (start, round(start+delta, 2)) for start in START_LEVELS for delta in DELTAS
}

VOL_LIMIT=0.05  # Change this will change all the pickle files, remember to re-generate them

In [None]:
def query_binned_data(market: str, t1_string: str, t2_string: str, vol_limit: float):
    grouped_data = None
    try:            
        # Write the SQL query
        query = f'''
        SELECT *
        FROM stabilvol_{t1_string}_{t2_string}
        WHERE Volatility < {vol_limit} AND Market = "{market}"
        '''
        # Load the FHT data from the database
        df = pd.read_sql_query(query, conn)
    except pd.errors.DatabaseError:
        print(f'No data for market {market} with thresholds {t1_string}-{t2_string}')
    else:
        grouped_data = select_bins(df)
    return grouped_data

def make_megadataframe(market, save=True):
    df_dict = {}
    for t1, t2 in tqdm(LEVELS):
        # Create the strings for the threshold values
        t1_string = f.stringify_threshold(t1)
        t2_string = f.stringify_threshold(t2)
        # Filename for the MFHT data
        filename = f'../data/processed/trapezoidal_selection/mfht_{market}_{t1_string}_{t2_string}.pkl'
        if os.path.exists(filename):
            print(f"File '{filename}' already exists")
        elif save:
            # Load the dataframe from the database if it exists
            grouped_data = query_binned_data(market, t1_string, t2_string, VOL_LIMIT)
            grouped_data.to_pickle(filename)
        else:
            grouped_data = query_binned_data(market, t1_string, t2_string, VOL_LIMIT)
            df_dict[(t1, t2)] = grouped_data  
            
    return df_dict

In [None]:
market = "UN"
megaframe = make_megadataframe(market)

In [None]:
market = "UW"
megaframe = make_megadataframe(market)

In [None]:
market = "LN"
megaframe = make_megadataframe(market)

In [None]:
market = "JT"
megaframe = make_megadataframe(market)

In [None]:
def get_thresholds(market):
    for filename in os.listdir('../data/processed/trapezoidal_selection/'):
        if filename.startswith(f'mfht_{market}_'):
            t1, t2 = filename.replace(".pkl", "").split('_')[2:4]
            t1 = f.numerify_threshold(t1)
            t2 = f.numerify_threshold(t2)
            yield (t1, t2)

In [None]:
from IPython.display import display, Markdown

# Create a dictionary where keys are the unique first elements and values are lists of corresponding second elements
table_dict = {}

thresholds_table = [[t1, t2] for t1, t2 in sorted(get_thresholds("UN"), key=lambda x: float(x[0]), reverse=False)]
for item in thresholds_table:
    if item[0] not in table_dict:
        table_dict[item[0]] = [item[1]]
    else:
        table_dict[item[0]].append(item[1])

# Create the markdown table
markdown_table = f"| Start Threshold {'| End |'*1} |\n{'|:-------:|'*1}\n"
for key, values in table_dict.items():
    values = sorted(values, key=lambda x: float(x))
    markdown_table += f"| {key} | {'| '.join([str(s) for s in values])} |\n"

# Display the markdown table
display(Markdown(markdown_table))

In [None]:
def plot_mfht_grid(markets, plotsscale='', show=False):
    if not isinstance(markets, list):
        markets = [markets]
    if plotsscale not in ['', 'log']:
        raise ValueError("plotsscale must be either '' or 'log'")
    
    n_rows = 10
    n_cols = 10
    
    # Create a grid of subplots
    fig, axs = plt.subplots(n_rows, n_cols, figsize=(16, 16))
    
    # Flatten the array of axes
    axs = axs.flatten()
    
    # Iterate over the indices and axes
    for i, ((t1, t2), ax) in enumerate(zip(LEVELS, axs)):
        # Create the strings for the threshold values
        t1_string = f.stringify_threshold(t1)
        t2_string = f.stringify_threshold(t2)
        
        for market in markets:
            # Load the dataframe from the database if it exists
            try:
                df = pd.read_pickle(f'../data/processed/trapezoidal_selection/mfht_{market}_{t1_string}_{t2_string}.pkl')
            except FileNotFoundError as e:
                print(f"File 'mfht_{market}_{t1_string}_{t2_string}.pkl' not found")
                continue
            else:
                
                x = df.index.categories.left.values
                y = df['mean'].values
                
                y_err = df['error_on_the_mean'].values
                
                ax.plot(x, y, label=market if i==0 else "")
                ax.fill_between(x, y - y_err, y + y_err, alpha=0.2)
            
            if plotsscale == 'log':
                # ax.set_xscale('log')
                ax.set_yscale('log')
            
            # If this is the first column, set the y-label
            if i % n_cols == 0:
                ax.set_ylabel(f"$\\theta_i = {t1}$")
            # Always set the title with the final threshold value
            ax.set_title(f"$\\theta_f = {t2}$")
            
            # ax.set_xlim(0, 1)
    # Place a legend above the subplots
    fig.legend(loc='upper center', bbox_to_anchor=(0.5, -0.008), ncol=4)
    
    plt.tight_layout()
    if show:
        plt.show()
    
    marketsname = ''.join(markets) 
    fig.savefig(f'../visualization/mfhts/{marketsname}_FHT_threshold_{plotsscale}grid.png')
    fig.savefig(f'../visualization/mfhts/{marketsname}_FHT_threshold_{plotsscale}grid.eps')

In [None]:
plt.close('all')
plot_mfht_grid(["UN", "UW", "LN", "JT"])
plot_mfht_grid(["UN", "UW", "LN", "JT"], plotsscale='log')


In [None]:
plt.close('all')
for market in ["UN", "UW", "LN", "JT"]:
    plot_mfht_grid(market)

In [None]:
un_01_00 = query_binned_data("UN", "0p1", "0p0", 10)

In [None]:
un_01_00.plot(y='mean', yerr='error_on_the_mean', figsize=(16, 9), logy=True)

In [None]:
# Assuming you have 4 datasets
A = np.random.rand(10, 100)
B = np.random.rand(10, 100)
C = np.random.rand(10, 100)
D = np.random.rand(10, 100)

fig, axs = plt.subplots(10, 10, figsize=(15, 15))

for i in range(10):
    for j in range(10):
        axs[i, j].plot(A[i], label='')
        axs[i, j].plot(B[i], label='')
        axs[i, j].plot(C[i], label='')
        axs[i, j].plot(D[i], label='')

# Place a legend above the subplots
fig.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=4)

plt.tight_layout()
plt.show()