aa# MFHT Grid Plot
plt.close('all')

In [None]:
import sqlite3

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime

import sys
sys.path.append('../stabilvol')
from utility.functions import stringify_threshold

DATABASE = '../data/processed/trapezoidal_selection/stabilvol.sqlite'
# Connect to the SQLite database
conn = sqlite3.connect(DATABASE)
cur = conn.cursor()

ImportError: cannot import name 'stringify_thresholds' from 'utility.functions' (g:\UNIPA\ECONOFISICA\stabilizing_volatility\notebooks\../stabilvol\utility\functions.py)

In [None]:
import os
print(os.getcwd())
os.path.exists(DATABASE)

In [None]:
def select_bins(df, max_n=1000):
    nbins = 50
    
    while True:
        # Use qcut to bin 'Volatility' values
        df['Bins'] = pd.qcut(df['Volatility'], nbins, duplicates='drop')
        
        # Group by the bins and calculate the mean and standard error of 'value'
        grouped = df.groupby('Bins')['FHT'].agg(['mean', error_on_the_mean, 'size'])
        count = grouped['size'].min()
        
        if count < max_n or nbins > 1000:
            break
        else:
            nbins += 20
    return grouped, nbins

def error_on_the_mean(values):
    return np.std(values)/np.sqrt(len(values))

In [None]:
MARKETS = ["UN", "UW", "LN", "JT"]
START_DATE = "1980-01-01"
END_DATE = "2022-07-01"
START_LEVELS = [-2.0, -1.0, -0.5, -0.2, -0.1, 0.1, 0.2, 0.5, 1.0, 2.0]
DELTAS = [2.0, 1.0, 0.5, 0.2, 0.1, -0.1, -0.2, -0.5, -1.0, -2.0]
LEVELS = {
    (start, round(start+delta, 2)) for start in START_LEVELS for delta in DELTAS
}
LEVELS = sorted(LEVELS)

VOL_LIMIT= 0.5  # Change this will change all the pickle files, remember to re-generate them
LEVELS

In [None]:
def query_binned_data(market: str, start_date:str, end_date:str = None, vol_limit:float = 0.5, t1_string:str = "m0p5", t2_string:str = "m1p5"):
    grouped_data = None
    end_date = '2022-07-01' if end_date is None else end_date
    try:            
        # Write the SQL query
        query = f'''
        SELECT *
        FROM stabilvol_{t1_string}_{t2_string}
        WHERE Volatility < {vol_limit} 
        AND Market = "{market}"
        AND start > "{start_date}"
        AND end < "{end_date}"    
        '''
        # Load the FHT data from the database
        df = pd.read_sql_query(query, conn)
    except pd.errors.DatabaseError:
        print(f'No data for market {market} with thresholds {t1_string}-{t2_string}')
        nbins = 0
    else:
        grouped_data, nbins = select_bins(df)
    return grouped_data, nbins

def save_all_mfhts(market, save=True):
    bins_dict = {}
    for t1, t2 in tqdm(LEVELS):
        # Create the strings for the threshold values
        t1_string = f.stringify_threshold(t1)
        t2_string = f.stringify_threshold(t2)
        # Filename for the MFHT data
        filename = f'../data/processed/trapezoidal_selection/mfht_{market}_{t1_string}_{t2_string}.pkl'
           
        if save and not os.path.exists(filename):
            # Load the dataframe from the database if it exists
            grouped_data, nbins = query_binned_data(market, t1_string, t2_string, VOL_LIMIT)
            grouped_data.to_pickle(filename)
        else:
            print(f"File '{filename}' already exists")
            nbins = 0
        bins_dict[(t1, t2)] = nbins  
            
    return bins_dict

In [None]:
df, _ = query_binned_data("UN", "2000-10-01", "2022-01-01", 0.5)
df.head()

## Shrinking Window
We take the start date closer to the end date to see if the nonmonotic behaviour vanishes

In [None]:
def create_dates():
    # Define the start and end dates
    start_date = datetime.date(1980, 1, 1)
    end_date = datetime.date(2022, 6, 25)
    
    # Calculate the number of days between the start and end dates
    total_days = (end_date - start_date).days
    
    # Generate a logarithmically spaced sequence of numbers between 1 and the total number of days
    log_days = np.logspace(0, np.log10(total_days), num=21, base=10.0)
    # Convert to integers to remove duplicates, then sort
    log_days = sorted(set(map(int, log_days)), reverse=True)
    
    # Convert the logarithmically spaced numbers to dates
    return [end_date - datetime.timedelta(days=int(d)) for d in log_days]
    
dates = create_dates()
print(f"There are {len(dates)} dates starting from {dates[0]} to {dates[-1]}")


In [None]:
import matplotlib.colors as mcolors

def desaturate_color(color):
    # Convert RGB to HLS
    rgb = mcolors.to_rgb(color)
    h, s, v = mcolors.rgb_to_hsv(rgb)

    # Decrease the saturation by 50% to get a desaturated color
    return mcolors.hsv_to_rgb((h, s/4, v))


def plot_mfhts(market, mfht, ax, **kwargs):
    x = mfht.index.categories.left.values
    # Renormalize the first value
    x[0] = 0
    y = mfht['mean'].values
    
    line, = ax.plot(x, y, label=kwargs.get('label', None))
    
    if kwargs.get('error', True):
        y_err = mfht['error_on_the_mean'].values     
        ax.fill_between(x, y - y_err, y + y_err, color=desaturate_color(line.get_color()))

    ax.set_title(r"\emph{" + market + r"}", fontsize=18)
    
    ax.set_xlim(kwargs.get('xlim', (-0.001, 0.08)))
    
    ax.set_yscale(kwargs.get('yscale', 'linear'))
    
    ax.grid(True)
    return ax

from mpl_toolkits.axes_grid1.inset_locator import inset_axes

def plot_inset(mfht, inset_ax):
    mfht.dropna(inplace=True)    
    x = [cat.left for cat in mfht.index.tolist()]
    y = mfht['mean'].values
    
    # Now you can plot on the inset axes
    inset_ax.plot(x, y)
    return inset_ax

In [None]:
# Use LaTeX for text rendering
plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = 'serif'

fig, axs = plt.subplots(2, 2, figsize=(12, 8), sharey=True, sharex=True)

axs[0, 0].set_ylabel('MFHT', y=-0.1 ,fontsize=16)
axs[1, 0].set_xlabel('Volatility', x=1.1, fontsize=16)

for i, (market, ax) in enumerate(zip(MARKETS, axs.flatten())):
    # Place the inset axes
    inset_ax = inset_axes(ax, width="50%", height="60%", loc=1) 
    inset_ax.tick_params(axis='both', which='major', labelsize=11)
    for start_date in tqdm(dates, desc=market):
        mfht, nbins = query_binned_data(market, start_date, VOL_LIMIT)
        
        ax = plot_mfhts(market, mfht, ax)
        
        if start_date in dates[-10:-1]:
            inset_ax = plot_inset(mfht, inset_ax)

# fig.tight_layout()

plt.show()

In [None]:
fig.savefig(f'../visualization/dynamics/shrinking_windows.png', bbox_inches='tight')
fig.savefig(f'../visualization/dynamics/shrinking_windows.eps', bbox_inches='tight', dpi=300)
fig.savefig(f'../visualization/dynamics/shrinking_windows.pdf', bbox_inches='tight')

### Focus on the last shorter windows

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
market = "UN"
# Assuming 'ax' is your existing axes object
inset_ax = inset_axes(ax, width="50%", height="60%", loc=1)  # loc parameter is the location of the inset axes
# You may also want to adjust the ticks
inset_ax.tick_params(axis='both', which='major', labelsize=8)
for start_date in dates:
        mfht, nbins = query_binned_data(market, start_date, VOL_LIMIT)
        print(nbins, end=', ')
        ax = plot_mfhts(market, mfht, ax, xlim=(-0.005, 0.08))
        
        if start_date in dates[-10:-1]:            
            plot_inset(mfht, inset_ax)
        

# 2008 Crisis
Please note that we need to establish clear key dates:
15th September 2008 bankruptcy filing of LB
\url{https://en.wikipedia.org/wiki/Lehman\_Brothers}

but troubles started much earlier with the subprime mortgage crisis, so we need to look at when housing market prices crashed.

Note: "In August 2007, the firm closed its subprime lender, BNC Mortgage, eliminating 1,200 positions in 23 locations, and took an after-tax charge of \$25 million and a \$27 million reduction in goodwill. Lehman said that poor market conditions in the mortgage space "necessitated a substantial reduction in its resources and capacity in the subprime space."

In September 2007, Joe Gregory appointed Erin Callan as CFO. On March 16, 2008, after rival Bear Stearns was taken over by JP Morgan Chase in a fire sale, market analysts suggested that Lehman would be the next major investment bank to fall. Callan fielded Lehman's first quarter conference call, where the firm posted a profit of \$489 million, compared to Citigroup's \$5.1 billion and Merrill Lynch's \$1.97 billion losses which was Lehman’s 55th consecutive profitable quarter. The firm's stock price leapt 46 percent after that announcement." 

Also note that March 2007 marks the beginning of the subprime crisis with the largest drop in house prices in a decade.

In [None]:
CRISIS2007 = datetime.date(2007, 3, 15)
CRISIS2008 = datetime.date(2008, 9, 15)

In [None]:
def create_dates(center_date, nwindows=20):
    # Define the maximum duration (28 years in days)
    max_duration = 28 * 365
    
    # Generate durations that decrease logarithmically
    durations = np.logspace(1.4, np.log10(max_duration), num=nwindows, base=10.0)
    durations = sorted(set(map(int, durations)), reverse=True)
    
    # Generate the windows
    return [(center_date - datetime.timedelta(days=int(d))/2, center_date + datetime.timedelta(days=int(d))/2) for d in durations]

In [None]:
def plot_shrinkingmfht(windows, vol_limit = VOL_LIMIT):
    # Use LaTeX for text rendering
    plt.rcParams['text.usetex'] = True
    plt.rcParams['font.family'] = 'serif'
    
    fig, axs = plt.subplots(2, 2, figsize=(12, 8), sharey=True, sharex=True)
    
    axs[0, 0].set_ylabel('MFHT', y=-0.1 ,fontsize=16)
    axs[1, 0].set_xlabel('Volatility', x=1.1, fontsize=16)
    
    for i, (market, ax) in enumerate(zip(MARKETS, axs.flatten())):
        # Place the inset axes
        inset_ax = inset_axes(ax, width="50%", height="60%", loc=1) 
        inset_ax.tick_params(axis='both', which='major', labelsize=11)
        for start_date, end_date in tqdm(windows, desc=market):
            duration = end_date - start_date 
            
            mfht, nbins = query_binned_data(market, start_date, end_date, vol_limit)
            
            ax = plot_mfhts(market, mfht, ax, error=False, label=duration.days)
            
            if duration < pd.to_timedelta('90d'):
                inset_ax = plot_inset(mfht, inset_ax)
    
    # fig.tight_layout()
    # Add common legend on top
    handles, labels = axs[0, 0].get_legend_handles_labels()  # get the handles and labels from any subplot
    fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=4)  # place the legend outside the plot area
    plt.show()

### March 2007

In [None]:
windows = create_dates(CRISIS2007, 16)

plot_shrinkingmfht(windows)

### September 2008

In [None]:
windows = create_dates(CRISIS2008, 16)

plot_shrinkingmfht(windows)

### Before the Crisis

In [None]:
nyears = 3 # Number of years before the crisis
windows = create_dates(CRISIS2007 - datetime.timedelta(days=nyears*365), 12)

plot_shrinkingmfht(windows)

### After the Crisis

In [None]:
nyears = -3 # Number of years before the crisis
windows = create_dates(CRISIS2007 - datetime.timedelta(days=nyears*365), 12)

plot_shrinkingmfht(windows)

Apparently nothing important changes.