In [13]:
import boto3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
import math
import random
from decimal import Decimal, getcontext
import pandas_market_calendars as mcal
import ast
from sklearn.preprocessing import StandardScaler
import pytz

In [14]:
s3 = boto3.client('s3')

training_bucket = "icarus-research-data"
training_prefix = 'training_datasets/expanded_1d_datasets/'

# Create a calendar
nyse = mcal.get_calendar('NYSE')
holidays = nyse.regular_holidays
market_holidays = holidays.holidays()

weekly_expiries = ['SPY', 'IVV', 'QQQ', 'GLD', 'IWM', 'EFA', 'XLK', 'XLV', 'TLT', 'LQD', 'XLE', 'TQQQ', 'SQQQ', 'SPXS', 'SPXL', 'SOXL', 'SOXS', 'MMM', 'ABT', 'ABBV', 'ACN', 'ATVI', 'ADM', 'ADBE', 'ADP', 
                   'AAP', 'AFL', 'ALB', 'ALGN', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMD', 'AAL', 'AXP', 'AIG', 'ABC', 'AMGN', 'ADI', 'APA', 'AAPL', 'AMAT', 'ANET', 'T', 'ADSK', 'BAC', 'BBWI', 'BAX', 'BBY', 'BIIB', 
                   'BLK', 'BA', 'BKNG', 'BMY', 'AVGO', 'CZR', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CAT', 'CBOE', 'CNC', 'CF', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CI', 'CSCO', 'C', 'CLX', 'CME', 'KO', 'CMCSA', 'CMA', 'CAG', 
                   'COP', 'STZ', 'GLW', 'COST', 'CTRA', 'CSX', 'CVS', 'DHI', 'DHR', 'DE', 'DAL', 'DVN', 'DLR', 'DFS', 'DISH', 'DIS', 'DG', 'DLTR', 'DPZ', 'DOW', 'DD', 'EBAY', 'EA', 'ELV', 'LLY', 'EMR', 'ENPH', 'EOG', 'EQT', 
                   'ETSY', 'EXPE', 'XOM', 'FDX', 'FITB', 'FSLR', 'FI', 'F', 'FTNT', 'FOXA', 'FCX', 'GEHC', 'GNRC', 'GD', 'GE', 'GM', 'GILD', 'GS', 'HAL', 'HSY', 'HES', 'HD', 'HON', 'HRL', 'HPQ', 'HUM', 'HBAN', 'IBM', 'ILMN', 
                   'INTC', 'IP', 'INTU', 'ISRG', 'JNJ', 'JPM', 'JNPR', 'KEY', 'KMB', 'KMI', 'KLAC', 'KHC', 'KR', 'LRCX', 'LVS', 'LEN', 'LMT', 'LOW', 'MRO', 'MPC', 'MAR', 'MA', 'MTCH', 'MCD', 'MCK', 'MDT', 'MRK', 'META', 'MET', 
                   'MGM', 'MU', 'MSFT', 'MRNA', 'MDLZ', 'MS', 'MOS', 'NTAP', 'NFLX', 'NEM', 'NKE', 'NSC', 'NOC', 'NCLH', 'NUE', 'NVDA', 'NXPI', 'OXY', 'ON', 'ORCL', 'PARA', 'PYPL', 'PEP', 'PFE', 'PCG', 'PM', 'PSX', 'PXD', 'PNC', 
                   'PPG', 'PG', 'PHM', 'QCOM', 'RTX', 'REGN', 'ROST', 'RCL', 'SPGI', 'CRM', 'SLB', 'STX', 'NOW', 'SWKS', 'SEDG', 'SO', 'LUV', 'SBUX', 'TMUS', 'TROW', 'TTWO', 'TPR', 'TGT', 'TSLA', 'TXN', 'TMO', 'TJX', 'TSCO', 'TFC', 
                   'TSN', 'USB', 'ULTA', 'UNP', 'UAL', 'UPS', 'URI', 'UNH', 'VLO', 'VZ', 'VRTX', 'VFC', 'V', 'WBA', 'WMT', 'WBD', 'WM', 'WFC', 'WDC', 'WHR', 'WMB', 'WYNN', 'ZION']


index = ["QQQ","SPY","IWM","TLT"]
test_lag = ["DAL","UAL","VZ","T","AAL","AMC"]
train_lag = ["AMC"]


big_fish =  [
            "AMD","NVDA","PYPL","GOOG","GOOGL","AMZN","PLTR","BAC","AAPL","NFLX","ABNB","CRWD","SHOP","FB","CRM",
            "MSFT","F","V","MA","JNJ","DIS","JPM","INTC","ADBE","BA","CVX","MRNA","PFE","SNOW","SOFI",'META',
            ]


bf_plus = ["AMD","NVDA","PYPL","GOOG","GOOGL","AMZN","PLTR","BAC","AAPL","NFLX","ABNB","CRWD","SHOP","FB","CRM",
            "MSFT","F","V","MA","JNJ","DIS","JPM","INTC","ADBE","BA","CVX","MRNA","PFE","SNOW","NKE",'META',
            'C','TGT','MMM','SQ','PANW','DAL','CSCO','UBER','SBUX']
bfpidx = ["AMD","NVDA","PYPL","GOOG","GOOGL","AMZN","PLTR","BAC","AAPL","NFLX","ABNB","CRWD","SHOP","FB","CRM",
            "MSFT","F","V","MA","JNJ","DIS","JPM","INTC","ADBE","BA","CVX","MRNA","PFE","SNOW","NKE",'META',
            'C','TGT','MMM','SQ','PANW','DAL','CSCO','UBER','SBUX','TSM',"QQQ","SPY","IWM","TLT"]

high_vol = ['ZM', 'UBER', 'TDOC', 'UAL', 'DAL', 'RCL', 'AMZN', 'ABNB', 'CRM',
       'F', 'ADBE', 'BA', 'META', 'TSLA', 'LCID', 'NIO', 'RIVN', 'COIN',
       'SQ', 'SHOP', 'DOCU', 'ROKU', 'TWLO', 'DDOG', 'ZS', 'NET', 'OKTA',
       'UPST', 'ETSY', 'PINS', 'FUTU', 'SE', 'BIDU', 'JD', 'BABA', 'RBLX',
       'AMD', 'NVDA', 'PYPL', 'PLTR', 'NFLX', 'CRWD', 'INTC', 'MRNA',
       'SNOW', 'SOFI', 'PANW']

low_vol = ['CMG', 'AXP', 'MMM', 'PEP', 'GE', 'MRK', 'HD', 'LOW', 'VZ', 'PG',
            'TSM', 'GOOG', 'GOOGL', 'BAC', 'AAPL', 'MSFT', 'V', 'MA', 'JNJ',
            'DIS', 'JPM', 'CVX', 'PFE', 'C', 'CAT', 'KO', 'MS', 'GS', 'IBM',
            'CSCO', 'WMT', 'WFC', 'TGT', 'COST', 'ORCL', 'SBUX', 'NKE', 'QQQ',
            'SPY', 'TLT', 'IWM']

In [15]:
def create_training_data_local(key_list, prefix, bucket_name,title, start_date, end_date):
    df_list = []
    hours = [10,11,12,13,14,15]
    start = start_date.split(' ')[0]
    end = end_date.split(' ')[0]
    # print(file_key)
    for key in key_list:
        for hour in hours:
            try:
                data = s3.get_object(Bucket=bucket_name, Key=f'{prefix}{key}/{hour}.csv')
                df = pd.read_csv(data.get("Body")) 
                df_list.append(df)
                df['hour'] = hour
            except:
                continue

    data = pd.concat(df_list)
    data.reset_index(drop=True, inplace=True)
    data['dt'] = pd.to_datetime(data['date'])
    data['day_of_week'] = data['dt'].apply(lambda x: x.dayofweek)
    data = data.loc[~data['symbol'].isin(leveraged_etfs)]
    data = data.round(3)
    data.replace([np.inf, -np.inf], 0, inplace=True)

    data.to_csv(f'/Users/charlesmiller/Documents/BF/{start}_{end}.csv', index=False)

    return data

def pull_training_data_local(end_date,start_date):
    data = pd.read_csv(f'/Users/charlesmiller/Documents/ALL_SYM/2018-01-01_2023-10-28.csv')
    data['dt'] = pd.to_datetime(data['date'])
    data = data.loc[data['dt'] <= end_date]
    data = data.loc[data['symbol'].isin(high_vol)]
    data.replace([np.inf, -np.inf], 0, inplace=True)

    return data


def pull_validation_data_local(end_date,start_date):
    data = pd.read_csv(f'/Users/charlesmiller/Documents/ALL_SYM/2018-01-01_2023-10-28.csv')
    data['dt'] = pd.to_datetime(data['date'])
    data = data.loc[data['dt'] <= end_date]
    data = data.loc[data['dt'] >= start_date]
    data = data.loc[data['symbol'].isin(high_vol)]
    data.replace([np.inf, -np.inf], 0, inplace=True)

    return data

In [16]:
def build_date_list(start_date, end_date):
    date_diff = end_date - start_date
    numdays = date_diff.days 
    dateList = []
    for x in range (0, numdays):
        temp_date = start_date + timedelta(days = x)
        if temp_date.weekday() > 4:
            continue
        else:
            dateList.append(temp_date)
    return dateList

def build_query_keys_hist():
    start_date = datetime(2021,1,5)
    date_diff = datetime(2022,7,29) - start_date
    numdays = date_diff.days 
    key_list = []
    for x in range (0, numdays):
        temp_date = start_date + timedelta(days = x)
        if temp_date.weekday() > 4:
            continue
        else:
            date_str = temp_date.strftime('%Y-%m-%d')
            if date_str in market_holidays:
                continue
            else:
                date_str = date_str.replace("-","/")
                key_list.append(date_str)
        
    return key_list
    
def build_query_keys(dates):
    key_list = []
    for date in dates:
        date_str = date.strftime('%Y-%m-%d')
        if date_str in market_holidays:
            continue
        else:
            year, month, day = date_str.split('-')
            temp = f'{year}/{month}/{day}'
            key_list.append(temp)

    return key_list

def build_query_keys_validation(end_date):
    validation_end_date = end_date + timedelta(days=7)
    dates = build_date_list(end_date, validation_end_date)
    key_list = []
    for date in dates:
        date_str = date.strftime('%Y-%m-%d')
        if date_str in market_holidays:
            continue
        else:
            year, month, day = date_str.split('-')
            temp = f'{year}/{month}/{day}'
            key_list.append(temp)

    return key_list

def build_validation_dates_local(deployment_date):
    end_date = deployment_date + timedelta(days=5)
    return end_date

In [17]:
def model_results_analyzer(predictions, y_validate):
    mse = mean_squared_error(y_validate, predictions)
    rmse = math.sqrt(mse)
    mae = mean_absolute_error(y_validate, predictions)
    r2 = r2_score(y_validate, predictions)
    print("MAE: ", mae)
    print("MSE: ", mse)
    print("RMSE: ", rmse)
    print("R2: ", r2)
    return mse, rmse, mae, r2

In [18]:
def create_dynamo_record(tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr, model_name, deployment_date, dataset_name, hyperparam_str, feature_str, target_str, fi_list):    
    ddb = boto3.resource('dynamodb','us-east-1')
    table = ddb.Table('icarus-models-results-table')
    eval_start = deployment_date
    eval_end = deployment_date + timedelta(days=4)
    try:
        precision = (tp/fp)
    except:
        precision = 0

    ## FILL IN
    item={
        'model_name': model_name,
        'deployment_date': deployment_date.strftime("%Y-%m-%d"),
        'algorithm_type': 'xgboost',
        'dataset': dataset_name,
        'TP': tp,
        'TPpct': Decimal("0"),
        'FP': fp,
        'FPpct': Decimal("0"),
        'TN': tn,
        'TNpct': Decimal("0"),
        'FN': fn,
        'FNpct': Decimal("0"),
        'precision_ratio': Decimal(str(precision)),
        'evaluation_timeperiod': f'{eval_start.strftime("%Y-%m-%d")}_{eval_end.strftime("%Y-%m-%d")}',
        'live': False,
        'hyperparameters': {hyperparam_str},
        'features' : {feature_str},
        'target' : target_str,
        'feature_importances': fi_list

    }

    print(item)
    response = table.put_item(
            Item=item
        )

    return response

In [19]:
def train_model(features, dataset, validation_dataset, target_label, target_value, hyperparams):
    X = dataset[features].astype(float)
    y = dataset[target_label]

    X_validate = validation_dataset[features].astype(float)
    y_validate = validation_dataset[target_label]


    xgb_model = xgb.XGBRegressor(subsample=hyperparams['subsample'],num_round=hyperparams['num_round'],min_child_weight=hyperparams['min_child_weight'],max_depth=hyperparams['max_depth'],learning_rate=hyperparams['learning_rate'],gamma=hyperparams['gamma'],colsample_bytree=hyperparams['colsample_bytree'],verbosity=0,objective='reg:squarederror',random_state=42)
    xgb_model.fit(X,y)

    predictions = xgb_model.predict(X_validate)
    mse, rmse, mae, r2 = model_results_analyzer(predictions, y_validate)
    validation_dataset['forecast'] = predictions


    fi = xgb_model.feature_importances_
    fi_list = []
    counter = 0
    for x in features:
        fi_list.append({x:fi[counter]})
        counter += 1
    return mse, rmse, mae, r2, fi_list, validation_dataset


def train_model_TSSim(features, dataset, validation_dataset, target_label, target_value, hyperparams):
    dataset = dataset.round(3)
    validation_dataset = validation_dataset.round(3)


    X = dataset[features].astype(float)
    y = dataset[target_label]

    X_validate = validation_dataset[features].astype(float)
    y_validate = validation_dataset[target_label]
    

    xgb_model = xgb.XGBRegressor(subsample=hyperparams['subsample'],num_round=hyperparams['num_round'],min_child_weight=hyperparams['min_child_weight'],max_depth=hyperparams['max_depth'],learning_rate=hyperparams['learning_rate'],gamma=hyperparams['gamma'],colsample_bytree=hyperparams['colsample_bytree'],verbosity=0,objective='reg:squarederror',random_state=42)
    xgb_model.fit(X,y)

    predictions = xgb_model.predict(X_validate)
    mse, rmse, mae, r2 = model_results_analyzer(predictions, y_validate)
    validation_dataset['forecast'] = predictions

    return mse, rmse, mae, r2, "fi_list", validation_dataset

In [20]:
def model_runner_v2(model_name, dataset_name, title, features, target_label, target_value,start_date, end_date,deployment_date, feature_str, hyperparams_str, hyperparams,local_data, dataset_start_date):
    validation_end_date = build_validation_dates_local(deployment_date)
    validation_dataset = pull_validation_data_local(validation_end_date,deployment_date)
    dataset = pull_training_data_local(end_date, dataset_start_date)
    dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    validation_dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    mse, rmse, mae, r2, fi_list, validation_dataset = train_model(features, dataset, validation_dataset, target_label, target_value, hyperparams)
    put_response = s3.put_object(Bucket="icarus-research-data", Key=f"backtesting_data/inv_alerts/{dataset_name}/{title}/{deployment_date.strftime('%Y-%m-%d')}.csv", Body=validation_dataset.to_csv())
    return put_response

def model_runner_data(model_name, dataset_name, title, features, target_label, target_value,start_date, end_date,deployment_date, feature_str, hyperparams_str, hyperparams):
    dates = build_date_list(start_date, end_date)
    key_list = build_query_keys(dates)
    dataset = create_training_data_local(key_list, 'fixed_alerts_full/new_features/big_fish_stable/', 'inv-alerts', title, start_date.strftime('%Y-%m-%d %H:%M:%S'),end_date.strftime('%Y-%m-%d %H:%M:%S'))
    return dataset

def model_runner_temporal_simulation(features, target_label, target_value,dataset_start_date,end_date,deployment_date,hyperparams):
    validation_end_date = build_validation_dates_local(deployment_date)
    dataset = pull_training_data_local(end_date, dataset_start_date)
    validation_dataset = pull_validation_data_local(validation_end_date,deployment_date)
    dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    validation_dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    dataset.dropna(subset=["close_diff_deviation"],inplace=True)
    validation_dataset.dropna(subset=["close_diff_deviation"],inplace=True)
    mse, rmse, mae, r2, fi_list, validation_dataset = train_model_TSSim(features, dataset, validation_dataset, target_label, target_value, hyperparams)
    return mse

In [21]:
def build_evaluation_period(eval_start, eval_end):
    dates_list = []
    while eval_start <= eval_end:
        date_object = {
            "deployment_date": eval_start,
            "dataset_end": eval_start - timedelta(days=10),
            "dataset_start": datetime(2020,1,1)
        }
        dates_list.append(date_object)
        eval_start += timedelta(days=7)
    return dates_list

In [22]:
#ma_cont_classifier_EXPvDiff_HistData
#ma_cont_classifier_EXPvDiff_HistData_NOVAL
#ma_cont_classifier_EXPvDiffDYNTUN_HistData_NOVAL
#'ma_cont_classifier_EXPvDiffDYNTUN_HistData_NOVAL
model_name = 'BFP:1D_TSSIM1_HIGHVOL_custHyp_2018'
dataset_name = '1D_TSSIM1_HIGHVOL_custHyp_2018'
title = 'BFP_1D'
hyperparams = {'subsample': 0.6, 'num_round': 1000, 'min_child_weight': 10, 'max_depth': 10, 'learning_rate': 0.1, 'gamma': 2, 'colsample_bytree': 1}
features = features = ['roc5', 'close_diff5', 'range_vol', 'SPY_1D', 'SPY_diff3', 'volume_vol_60M', 'daily_vol_diff30', 'return_vol_5D', '15min_vol_diff_pct', 'volume_vol_5D', 
             'v_diff_pct', 'adx', 'hour_vol_diff_pct', 'hour', 'return_vol_30D', 'price_25DDiff', 'min_vol_diff', 'volume_vol_450M', 'daily_volume_vol_diff', 'daily_volume_vol_diff_pct30', 
             'return_vol_30M', 'return_vol_240M', 'roc', 'hour_vol_diff', 'return_vol_4H']

target_label = 'one_min'
target_value = -.015
dates_list = build_evaluation_period(datetime(2022,10,3,tzinfo=pytz.timezone('US/Eastern')), datetime(2023,10,14,tzinfo=pytz.timezone('US/Eastern')))
dataset_start_date = datetime(2021,1,1) 

# date = dates_list[-1]
# response = model_runner_data(model_name, dataset_name, title, features, target_label, target_value,dataset_start_date,date['dataset_end'],date['deployment_date'], str(features),str(hyperparams), hyperparams)
for date in dates_list:
    response = model_runner_v2(model_name, dataset_name, title, features, target_label, target_value,dataset_start_date,date['dataset_end'],date['deployment_date'], str(features),str(hyperparams), hyperparams, local_data=True, dataset_start_date=dataset_start_date)
    
    

MAE:  0.032031457082641886
MSE:  0.0018227509880239543
RMSE:  0.04269368791781701
R2:  -0.12798432470913035
MAE:  0.022381703095338868
MSE:  0.0008442001752715792
RMSE:  0.02905512304691858
R2:  -0.007423349185796457
MAE:  0.026226374629456945
MSE:  0.001786258447601439
RMSE:  0.04226415085626871
R2:  0.029020787978374596
MAE:  0.02343382428016857
MSE:  0.0011548836766529182
RMSE:  0.03398357951500869
R2:  -0.04239844096433765
MAE:  0.03886976727752398
MSE:  0.0033013030419024297
RMSE:  0.05745696687001873
R2:  -0.18930299105566584
MAE:  0.027714793975947175
MSE:  0.0014734526921408627
RMSE:  0.03838557922111978
R2:  0.02352467500630917
MAE:  0.0259818343514352
MSE:  0.001165996057682053
RMSE:  0.034146684431757834
R2:  -0.00246826836920766
MAE:  0.01962901337072598
MSE:  0.0005023444141840028
RMSE:  0.02241304116321573
R2:  -0.8734727482436457
MAE:  0.02352502860295857
MSE:  0.0010474703784490618
RMSE:  0.03236464704657015
R2:  0.03225433303278247
MAE:  0.02130516852208063
MSE:  0.000

In [23]:
# def run_temporal_simulation(features, target_label, target_value, dataset_start_date, evaluation_start_date, evaluation_end_date):
#     mse_list = []
    
#     hyperparams = {'subsample': 0.6, 'num_round': 1000, 'min_child_weight': 10, 'max_depth': 10, 'learning_rate': 0.1, 'gamma': 2, 'colsample_bytree': 1}
#     dates_list = build_evaluation_period(evaluation_start_date, evaluation_end_date)
#     # date = dates_list[-1]
#     # response = model_runner_data(model_name, dataset_name, title, features, target_label, target_value,dataset_start_date,date['dataset_end'],date['deployment_date'], str(features),str(hyperparams), hyperparams)
#     for date in dates_list:
#         mse = model_runner_temporal_simulation(features, target_label, target_value,dataset_start_date,date['dataset_end'],date['deployment_date'], hyperparams)
#         mse_list.append(mse)

#     return mse_list

In [24]:
# number_of_simulations = 10
# now = datetime.now()
# title = 'BFP1d_reg'
# # total_feature_list = ['hour','year','month','day_of_month',
# #        'price7', 'price14', 'adjusted_volume', 'vol7', 'vol14',
# #        'rsi', 'rsi3', 'rsi5', 'roc', 'roc3', 'roc5', 'threeD_returns_close',
# #        'oneD_returns_close', 'range_vol', 'range_vol5MA', 'range_vol10MA',
# #        'range_vol25MA', 'oneD_stddev50', 'threeD_stddev50', 'cmf',
# #        'close_diff', 'close_diff3', 'close_diff5', 'v_diff_pct', 'adx',
# #        'volume_10MA', 'volume_25MA', 'price_10MA', 'price_25MA',
# #        'volume_10DDiff', 'volume_25DDiff', 'price_10DDiff', 'price_25DDiff',
# #        'SPY_diff', 'SPY_diff3', 'SPY_diff5', 'SPY_1D', 'SPY_3D', 'SPY_5D',
# #        'day_of_week','close_diff_deviation3','close_diff_deviation','roc_diff','range_vol_diff5']

# total_feature_list = ['hour','year','month','day_of_month',
#        'price7', 'price14', 'adjusted_volume', 'vol7', 'vol14',
#        'rsi', 'rsi3', 'rsi5', 'roc', 'roc3', 'roc5', 'threeD_returns_close',
#        'oneD_returns_close', 'range_vol', 'range_vol5MA', 'range_vol10MA',
#        'range_vol25MA', 'oneD_stddev50', 'threeD_stddev50', 'cmf',
#        'close_diff', 'close_diff3', 'close_diff5', 'v_diff_pct', 'adx',
#        'volume_10MA', 'volume_25MA', 'price_10MA', 'price_25MA',
#        'volume_10DDiff', 'volume_25DDiff', 'price_10DDiff', 'price_25DDiff',
#        'day_of_week','close_diff_deviation3','close_diff_deviation','roc_diff','range_vol_diff5',
#        'return_vol_240M', 'volume_vol_240M', 'return_vol_450M', 'volume_vol_450M', 'min_vol_diff', 'min_vol_diff_pct', 'min_volume_vol_diff', 
#        'min_volume_vol_diff_pct', 'return_vol_8H', 'return_vol_16H', 'volume_vol_8H', 'volume_vol_16H', 'hour_vol_diff', 'hour_vol_diff_pct', 'hour_volume_vol_diff', 
#        'hour_volume_vol_diff_pct', 'return_vol_5D', 'return_vol_10D', 'return_vol_30D', 'volume_vol_5D', 'volume_vol_10D', 'volume_vol_30D', 'daily_vol_diff', 'daily_vol_diff_pct',
#        'daily_vol_diff30', 'daily_vol_diff_pct30', 'daily_volume_vol_diff', 'daily_volume_vol_diff_pct', 'daily_volume_vol_diff30', 'daily_volume_vol_diff_pct30']
# testing_features = ['hour','month','day_of_month','price7', 
#        'rsi', 'rsi3', 'rsi5', 'roc', 'roc3', 'roc5', 'threeD_returns_close',
#        'oneD_returns_close', 'range_vol', 'oneD_stddev50', 'threeD_stddev50', 'cmf',
#        'close_diff', 'close_diff3', 'close_diff5', 'v_diff_pct', 'adx',
#        'price_10DDiff', 'price_25DDiff',
#        'day_of_week','close_diff_deviation3','close_diff_deviation','roc_diff','range_vol_diff5',
#         'return_vol_240M', 'volume_vol_240M', 'return_vol_450M', 'volume_vol_450M', 'min_vol_diff', 'min_vol_diff_pct', 'min_volume_vol_diff', 
#        'min_volume_vol_diff_pct', 'return_vol_8H', 'return_vol_16H', 'volume_vol_8H', 'volume_vol_16H', 'hour_vol_diff', 'hour_vol_diff_pct', 'hour_volume_vol_diff', 
#        'hour_volume_vol_diff_pct', 'return_vol_5D', 'return_vol_10D', 'return_vol_30D', 'volume_vol_5D', 'volume_vol_10D', 'volume_vol_30D', 'daily_vol_diff', 'daily_vol_diff_pct',
#        'daily_vol_diff30', 'daily_vol_diff_pct30', 'daily_volume_vol_diff', 'daily_volume_vol_diff_pct', 'daily_volume_vol_diff30', 'daily_volume_vol_diff_pct30', 'return_vol_15M',
#        'volume_vol_15M','return_vol_30M','volume_vol_30M','return_vol_60M','volume_vol_60M','15min_vol_diff_pct','return_vol_3D','volume_vol_3D','return_vol_4H','volume_vol_4H', 'SPY_diff',
#        'SPY_diff3', 'SPY_diff5', 'SPY_1D', 'SPY_3D', 'SPY_5D',]

# target_label = 'one_min'
# target_value = -.015
# results_array = []

# i = 0
# while i < number_of_simulations:
#        model_name = f"{title}_temporal_simulation_{i}"
#        print(model_name)
#        random_int = random.randint(24, 40)
#        features = random.sample(total_feature_list, random_int)
#        print(features)
#        mse_list = run_temporal_simulation(features, target_label, "target_value", dataset_start_date=datetime(2018,1,1,tzinfo=pytz.timezone('US/Eastern')), evaluation_start_date=datetime(2022,10,24,tzinfo=pytz.timezone('US/Eastern')), evaluation_end_date=datetime(2023,10,2,tzinfo=pytz.timezone('US/Eastern')))        
#        mse = sum(mse_list)/len(mse_list)
#        print(mse)
#        print()
#        results_array.append({"model_name": model_name, "features": features, "mse_avg": mse, "num_features": random_int})
#        i += 1

# results_df = pd.DataFrame(results_array)
# results_df.to_csv(f'/Users/charlesmiller/Documents/temporal_simulation_results/{title}/{now.year}_{now.month}_{now.day}.csv', index=False)