In [None]:
import pandas as pd
from itertools import product
import matplotlib.pyplot as plt
from datetime import datetime


db_file_path = 'db/ohlcv_ntickers_1254_2000-08-01_to_2023-12-23.pkl'
export_name = 'results'

# n_days_past_range = [1,2]
# n_days_future_range = [2]
# filter_lower_limit_range = [-100]
# filter_width_range = [50]
# loss_limit_range = [-0.5]

n_days_past_range = [1,2,3,4,5,7,10,12,15,17,20]
n_days_future_range = [1,2,3,4,5,10,15,20,25,30,35,40,45,50]
filter_lower_limit_range = [-125,-100,-80,-60,-50,-40,-35,-30,-25,-20,-15,-10,-5,0,5,10,15,20]
filter_width_range = [10,20,30,40,50,60,70]
loss_limit_range = [-10, -5, -1, 0]

num_combinations = len(n_days_past_range) * len(n_days_future_range) * len(filter_lower_limit_range) * len(filter_width_range) * len(loss_limit_range)
print(num_combinations)

In [None]:
data = pd.read_pickle(db_file_path)
open_data = data[["Open"]]
open_data = open_data.droplevel(0, axis=1)

In [None]:
def create_floor_mask(df, loss_limit, n_days_future):
    indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=n_days_future + 1)
    future_min_values = df.rolling(window=indexer).min()
    loss_threshold = df * (1 + (loss_limit / 100))

    floor_mask = future_min_values < loss_threshold
    
    return floor_mask

def create_floored_future_var(df, loss_limit, n_days_future, future_var):
    floor_mask = create_floor_mask(df, loss_limit, n_days_future)
    floor_mask = floor_mask.stack(dropna=False)

    floored_future_var = future_var.copy()
    floored_future_var[floor_mask] = loss_limit

    return floored_future_var
    

In [None]:
def get_year_column(df):
    return df.index.get_level_values(0).astype(str).str[:4]

def concat_dfs(dfs, column_names):    
    df = pd.concat(dfs, axis=1, keys=column_names)
    df = df.dropna()
    
    return df

def add_10_last_years_stats(df):
    df = df.sort_index(axis=1)

    df['median_median_10_last_years'] = df.loc[:, 'median_2013':'median_2023'].median(axis=1)
    df['mean_median_10_last_years'] = df.loc[:, 'median_2013':'median_2023'].mean(axis=1)
    df['min_median_10_last_years'] = df.loc[:, 'median_2013':'median_2023'].min(axis=1)

    df['median_mean_10_last_years'] = df.loc[:, 'mean_2013':'mean_2023'].median(axis=1)
    df['mean_mean_10_last_years'] = df.loc[:, 'mean_2013':'mean_2023'].mean(axis=1)
    df['min_mean_10_last_years'] = df.loc[:, 'mean_2013':'mean_2023'].min(axis=1)

    return df

def replace_by_zeros(df, col_name_beginning):
    columns = [col for col in df.columns if col.startswith(col_name_beginning)]
    df[columns] = df[columns].fillna(0).replace('', 0)

    return df

In [None]:
def calculate_var(df, start_day, end_day):
    var = (df.shift(-end_day) - df.shift(-start_day)) / df.shift(-start_day) * 100
    
    return var.stack(dropna=False)

def calculate_filtered_var(df, col, lower_limit, width):
    return df[(df[col] >= lower_limit) & (df[col] <= lower_limit + width)]

def calculate_overall_results(df_col):    
    return pd.DataFrame({
        'n_results': [len(df_col)],
        'median': [df_col.median()],
        'mean': [df_col.mean()]
    })

def calculate_yearly_results(df, column):
    pivot_table = pd.pivot_table(
        df,
        values=[column],
        index=['year'],
        aggfunc={column: ['median', 'mean', 'min', 'max', 'count']})

    pivot_table = pivot_table.unstack().to_frame().sort_index(level=1).T
    pivot_table.columns = ['_'.join(col) for col in pivot_table.columns]
    pivot_table.columns = pivot_table.columns.str.replace(f'{column}_', '')
    pivot_table = pivot_table.reindex(sorted(pivot_table.columns), axis=1)

    return pivot_table

In [None]:
def save_results(df, export_name):
    def get_date():
        return datetime.today().strftime('%d-%m-%Y')

    date = get_date()

    export_folder_path = './outputs/future vs past graphs/'
    export_file_path = f'{export_folder_path}{export_name}_{date}'

    df.to_csv(f'{export_file_path}.csv')
    df.to_excel(f'{export_file_path}.xlsx')

In [None]:
results = pd.DataFrame()
i = 0

for n_days_past, n_days_future in product(n_days_past_range, n_days_future_range):
    past_var = calculate_var(df=open_data, start_day=-n_days_past, end_day=0)
    future_var = calculate_var(df=open_data, start_day=0, end_day=n_days_future)

    for loss_limit in loss_limit_range:
        floored_future_var = create_floored_future_var(open_data, loss_limit, n_days_future, future_var)
        
        var = concat_dfs(
            dfs=[past_var, future_var, floored_future_var],
            column_names=['past_var', 'future_var', 'floored_future_var'])
        
        var['year'] = get_year_column(var)

        for filter_lower_limit, filter_width in product(filter_lower_limit_range, filter_width_range):
            var_filtered = calculate_filtered_var(
                df=var, col='past_var',
                lower_limit=filter_lower_limit, width=filter_width)
            
            params = pd.DataFrame({'n_days_past': [n_days_past], 'n_days_future': [n_days_future],
                      'filter_lower_limit': [filter_lower_limit], 'filter_width': [filter_width],
                      'loss_limit': [loss_limit]})
            overall_results = calculate_overall_results(df_col=var_filtered['floored_future_var'])
            yearly_results = calculate_yearly_results(df=var_filtered, column='floored_future_var')
            new_results = pd.concat([params, overall_results, yearly_results], axis=1)

            results = pd.concat([results, new_results], ignore_index=True)

            i += 1
            print(f'\r step: {i}/{num_combinations}', end='')

results = add_10_last_years_stats(results)
results = replace_by_zeros(df=results, col_name_beginning='count')
results.sort_values('median_median_10_last_years', ascending=False, inplace=True)

save_results(results, export_name)