In [1]:
import boto3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
import math
import random
from decimal import Decimal, getcontext
import pandas_market_calendars as mcal
import ast
from sklearn.preprocessing import StandardScaler
import pytz

In [2]:
s3 = boto3.client('s3')

training_bucket = "icarus-research-data"
training_prefix = 'training_datasets/expanded_1d_datasets/'

# Create a calendar
nyse = mcal.get_calendar('NYSE')
holidays = nyse.regular_holidays
market_holidays = holidays.holidays()

In [3]:
def create_training_data_local(key_list, prefix, bucket_name, start_date, end_date):
    df_list = []
    hours = [10,11,12,13,14,15]
    start = start_date.split(' ')[0]
    end = end_date.split(' ')[0]
    # print(file_key)
    for key in key_list:
        for hour in hours:
            try:
                data = s3.get_object(Bucket=bucket_name, Key=f'{prefix}{key}/losers/{hour}.csv')
                df = pd.read_csv(data.get("Body")) 
                df_list.append(df)
                df['hour'] = hour
            except:
                continue

    data = pd.concat(df_list)
    data.reset_index(drop=True, inplace=True)
    data['dt'] = pd.to_datetime(data['date'])
    data['day_of_week'] = data['dt'].apply(lambda x: x.dayofweek)
    data = data.round(3)
    data.replace([np.inf, -np.inf], 0, inplace=True)

    data.to_csv(f'/Users/charlesmiller/Documents/LOSERS/{start}_{end}.csv', index=False)

    return data

def pull_training_data_local(end_date,start_date):
    data = pd.read_csv(f'/Users/charlesmiller/Documents/LOSERS/2018-01-01_2023-10-19.csv')
    data['dt'] = pd.to_datetime(data['date'])
    data = data.loc[data['dt'] <= end_date]
    # data = data.loc[data['dt'] >= start_date]
    data.replace([np.inf, -np.inf], 0, inplace=True)

    return data


def pull_validation_data_local(end_date,start_date):
    data = pd.read_csv(f'/Users/charlesmiller/Documents/LOSERS/2018-01-01_2023-10-19.csv')
    data['dt'] = pd.to_datetime(data['date'])
    data = data.loc[data['dt'] <= end_date]
    data = data.loc[data['dt'] >= start_date]
    data.replace([np.inf, -np.inf], 0, inplace=True)

    return data

In [4]:
def build_date_list(start_date, end_date):
    date_diff = end_date - start_date
    numdays = date_diff.days 
    dateList = []
    for x in range (0, numdays):
        temp_date = start_date + timedelta(days = x)
        if temp_date.weekday() > 4:
            continue
        else:
            dateList.append(temp_date)
    return dateList

def build_query_keys_hist():
    start_date = datetime(2021,1,5)
    date_diff = datetime(2022,7,29) - start_date
    numdays = date_diff.days 
    key_list = []
    for x in range (0, numdays):
        temp_date = start_date + timedelta(days = x)
        if temp_date.weekday() > 4:
            continue
        else:
            date_str = temp_date.strftime('%Y-%m-%d')
            if date_str in market_holidays:
                continue
            else:
                date_str = date_str.replace("-","/")
                key_list.append(date_str)
        
    return key_list
    
def build_query_keys(dates):
    key_list = []
    for date in dates:
        date_str = date.strftime('%Y-%m-%d')
        if date_str in market_holidays:
            continue
        else:
            year, month, day = date_str.split('-')
            temp = f'{year}/{month}/{day}'
            key_list.append(temp)

    return key_list

def build_query_keys_validation(end_date):
    validation_end_date = end_date + timedelta(days=7)
    dates = build_date_list(end_date, validation_end_date)
    key_list = []
    for date in dates:
        date_str = date.strftime('%Y-%m-%d')
        if date_str in market_holidays:
            continue
        else:
            year, month, day = date_str.split('-')
            temp = f'{year}/{month}/{day}'
            key_list.append(temp)

    return key_list

def build_validation_dates_local(deployment_date):
    end_date = deployment_date + timedelta(days=5)
    return end_date

In [5]:
def model_results_analyzer(predictions, y_validate, target_value):
    result_list = []
    counter = 0
    predictions_series = pd.Series(predictions,name='prediction_values')
    for x in predictions:
        if x == 1:
            if y_validate.iloc[counter] == 1:
                classification_result = 0
            else:
                classification_result = 1
        elif x == 0:
            if y_validate.iloc[counter] == 0:
                classification_result = 2
            else: 
                classification_result = 3
        result_list.append(classification_result)
        counter += 1
    three_max = pd.Series(y_validate,name='three_max')
    df = pd.concat([pd.Series(result_list,name='classifier_performance'),predictions_series,three_max],axis=1)
    df.reset_index(drop=True, inplace=True)
    # df = pd.DataFrame([result_list, y_validate], columns=['classifier_performance', 'prediction_score'])

    tp = df.loc[df['classifier_performance'] == 0]
    fp = df.loc[df['classifier_performance'] == 1]
    tn = df.loc[df['classifier_performance'] == 2]
    fn = df.loc[df['classifier_performance'] == 3]

    # tp_scr = tp["prediction_score"].mean()
    # fp_scr = fp["prediction_score"].mean()
    # tn_scr = tn["prediction_score"].mean()
    # fn_scr = fn["prediction_score"].mean()

    return len(tp), "0", len(fp), "0", len(tn), "0", len(fn), "0"

In [6]:
def create_dynamo_record(tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr, model_name, deployment_date, dataset_name, hyperparam_str, feature_str, target_str, fi_list):    
    ddb = boto3.resource('dynamodb','us-east-1')
    table = ddb.Table('icarus-models-results-table')
    eval_start = deployment_date
    eval_end = deployment_date + timedelta(days=4)
    try:
        precision = (tp/fp)
    except:
        precision = 0

    ## FILL IN
    item={
        'model_name': model_name,
        'deployment_date': deployment_date.strftime("%Y-%m-%d"),
        'algorithm_type': 'xgboost',
        'dataset': dataset_name,
        'TP': tp,
        'TPpct': Decimal("0"),
        'FP': fp,
        'FPpct': Decimal("0"),
        'TN': tn,
        'TNpct': Decimal("0"),
        'FN': fn,
        'FNpct': Decimal("0"),
        'precision_ratio': Decimal(str(precision)),
        'evaluation_timeperiod': f'{eval_start.strftime("%Y-%m-%d")}_{eval_end.strftime("%Y-%m-%d")}',
        'live': False,
        'hyperparameters': {hyperparam_str},
        'features' : {feature_str},
        'target' : target_str,
        'feature_importances': fi_list

    }

    print(item)
    response = table.put_item(
            Item=item
        )

    return response

In [7]:
def train_model(features, dataset, validation_dataset, target_label, target_value, hyperparams):
    dataset.loc[:, 'label'] = (dataset[target_label] < target_value).astype(int)
    validation_dataset.loc[:, 'label'] = (validation_dataset[target_label] < target_value).astype(int)

    dataset = dataset.round(3)
    validation_dataset = validation_dataset.round(3)


    X = dataset[features]
    y = dataset['label']

    X_validate = validation_dataset[features]
    y_validate = validation_dataset['label']

    xgb_model = xgb.XGBClassifier(subsample=hyperparams['subsample'],num_round=hyperparams['num_round'],min_child_weight=hyperparams['min_child_weight'],max_depth=hyperparams['max_depth'],learning_rate=hyperparams['learning_rate'],gamma=hyperparams['gamma'],colsample_bytree=hyperparams['colsample_bytree'],verbosity=0,objective='binary:logistic',random_state=42)
    xgb_model.fit(X,y)

    print(y.value_counts())
    predictions = xgb_model.predict(X_validate)
    probabilities = xgb_model.predict_proba(X_validate)
    tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr = model_results_analyzer(predictions, y_validate, target_value)

    fi = xgb_model.feature_importances_
    fi_list = []
    counter = 0
    for x in features:
        fi_list.append({x:fi[counter]})
        counter += 1
    print(tp,fp,tn,fn)
    return tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr,str(fi_list), predictions, probabilities


def train_model_TSSim(features, dataset, validation_dataset, target_label, target_value, hyperparams):
    dataset.loc[:, 'label'] = (dataset[target_label] < target_value).astype(int)
    validation_dataset.loc[:, 'label'] = (validation_dataset[target_label] < target_value).astype(int)

    dataset = dataset.round(3)
    validation_dataset = validation_dataset.round(3)


    X = dataset[features]
    y = dataset['label']

    X_validate = validation_dataset[features]
    y_validate = validation_dataset['label']

    xgb_model = xgb.XGBClassifier(subsample=hyperparams['subsample'],num_round=hyperparams['num_round'],min_child_weight=hyperparams['min_child_weight'],max_depth=hyperparams['max_depth'],learning_rate=hyperparams['learning_rate'],gamma=hyperparams['gamma'],colsample_bytree=hyperparams['colsample_bytree'],verbosity=0,objective='binary:logistic',random_state=42)
    xgb_model.fit(X,y)

    predictions = xgb_model.predict(X_validate)
    tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr = model_results_analyzer(predictions, y_validate, target_value)

    return tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr,"str(fi_list)", predictions, "probabilities"

In [8]:
def model_runner_v2(model_name, dataset_name, title, features, target_label, target_value,start_date, end_date,deployment_date, feature_str, hyperparams_str, hyperparams,local_data, dataset_start_date):
    # dates = build_date_list(start_date, end_date)
    # key_list = build_query_keys(dates)
    validation_end_date = build_validation_dates_local(deployment_date)
    # validation_keys = build_query_keys_validation(deployment_date)
    # validation_dataset = create_validation_data(validation_keys, 'fixed_alerts_full/new_features/big_fish_stable/', 'inv-alerts')
    validation_dataset = pull_validation_data_local(validation_end_date,deployment_date)
    dataset = pull_training_data_local(end_date, dataset_start_date)
    # else:
    #     dataset = create_training_data_v2(key_list, 'fixed_alerts_full/new_features/big_fish/', 'inv-alerts')
    dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    validation_dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr, fi_list, predictions, probabilities = train_model(features, dataset, validation_dataset, target_label, target_value, hyperparams)
    response = create_dynamo_record(tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr, model_name, deployment_date, dataset_name,hyperparams_str,feature_str, f"{target_value}+{target_label}", fi_list)
    validation_dataset['probabilities'] = probabilities[:,1]
    validation_dataset['predictions'] = predictions
    validation_csv = validation_dataset.to_csv()
    put_response = s3.put_object(Bucket="icarus-research-data", Key=f"backtesting_data/inv_alerts/{dataset_name}/{title}/{deployment_date.strftime('%Y-%m-%d')}.csv", Body=validation_csv)
    return response

def model_runner_data(start_date,end_date,):
    dates = build_date_list(start_date, end_date)
    key_list = build_query_keys(dates)
    print(key_list[-1])
    dataset = create_training_data_local(key_list, 'bf_alerts/data/', 'inv-alerts', start_date.strftime('%Y-%m-%d %H:%M:%S'),end_date.strftime('%Y-%m-%d %H:%M:%S'))
    return dataset


def model_runner_temporal_simulation(features, target_label, target_value,dataset_start_date,end_date,deployment_date,hyperparams):
    validation_end_date = build_validation_dates_local(deployment_date)
    dataset = pull_training_data_local(end_date, dataset_start_date)
    validation_dataset = pull_validation_data_local(validation_end_date,deployment_date)
    dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    validation_dataset.dropna(subset=["close_diff_deviation3"],inplace=True)
    dataset.dropna(subset=["close_diff_deviation"],inplace=True)
    validation_dataset.dropna(subset=["close_diff_deviation"],inplace=True)
    tp, tp_scr, fp, fp_scr, tn, tn_scr, fn, fn_scr, fi_list, predictions, probabilities = train_model_TSSim(features, dataset, validation_dataset, target_label, target_value, hyperparams)
    return tp, fp, fn, tn 

In [9]:
def build_evaluation_period(eval_start, eval_end):
    dates_list = []
    while eval_start <= eval_end:
        date_object = {
            "deployment_date": eval_start,
            "dataset_end": eval_start - timedelta(days=10),
            "dataset_start": datetime(2020,1,1)
        }
        dates_list.append(date_object)
        eval_start += timedelta(days=7)
    return dates_list

In [10]:
# #ma_cont_classifier_EXPvDiff_HistData
# #ma_cont_classifier_EXPvDiff_HistData_NOVAL
# #ma_cont_classifier_EXPvDiffDYNTUN_HistData_NOVAL
# #ma_cont_classifier_EXPvDiffDYNTUN_HistData_NOVAL
# model_name = 'LOSERS:1D_TSSIM1_expVolSPYboth++_custHyP15_2018'
# dataset_name = '1D_TSSIM1_expVolSPYboth++_custHypP15_2018'
# title = 'LOSERS_1D'
# hyperparams = {'subsample': 0.6, 'num_round': 1000, 'min_child_weight': 10, 'max_depth': 10, 'learning_rate': 0.1, 'gamma': 2, 'colsample_bytree': 1}
# features = ['close_diff3', 'daily_vol_diff', 'return_vol_16H', 'return_vol_10D', 'hour', 'close_diff_deviation3', 'return_vol_450M', 'roc', 'rsi3', 'range_vol', 
#             'hour_vol_diff_pct', 'return_vol_5D', 'volume_vol_10D', 'day_of_week', 'volume_vol_5D', 'volume_vol_240M', 'hour_volume_vol_diff_pct', 'adx', 'daily_volume_vol_diff_pct30', 
#             'return_vol_30D', 'range_vol_diff5', 'daily_volume_vol_diff30', 'threeD_returns_close', 'close_diff', 'cmf', 'threeD_stddev50', 'daily_vol_diff_pct', 'day_of_month', 'hour_vol_diff', 
#             'min_volume_vol_diff_pct', 'volume_vol_8H', 'roc3', 'month', 'volume_vol_450M', 'close_diff_deviation', 'min_vol_diff_pct', 'volume_vol_16H', 'close_diff5', 'return_vol_240M',"SPY_diff","SPY_3D"]

# target_label = 'one_min'
# target_value = -.025
# dataset_start_date = datetime(2018,1,1,tzinfo=pytz.timezone('US/Eastern')) 

# dates_list = build_evaluation_period(datetime(2022,10,24,tzinfo=pytz.timezone('US/Eastern')), datetime(2023,10,7,tzinfo=pytz.timezone('US/Eastern')))
# # date = dates_list[-1]
# response = model_runner_data(start_date=dataset_start_date,end_date=datetime(2023,10,28,tzinfo=pytz.timezone('US/Eastern')))
# # for date in dates_list:
# #     print(date)
# #     response = model_runner_v2(model_name, dataset_name, title, features, target_label, target_value,"dataset_start_date",date['dataset_end'],date['deployment_date'], str(features),str(hyperparams), hyperparams, local_data=True, dataset_start_date=date['dataset_start'])
    
    

2023/10/27


In [17]:
# data = pd.read_csv(f'/Users/charlesmiller/Documents/LOSERS/2018-01-01_2023-10-28.csv')
# # data['date'] = data['date_x'].astype(str)
# # data['date'] = data['date_x'].apply(lambda x: x.split(" ")[0])
# data['date'] = pd.to_datetime(data['date'])
# # data.drop(['one_pct','three_pct','Unnamed: 0.2', "Unnamed: 0","Unnamed: 0.1", "date_x","date_y",'vw_x', 'n_x', 'return_vol_240M_x', 'volume_vol_240M_x', 'return_vol_450M_x', 'volume_vol_450M_x', 'min_vol_diff_x', 'min_vol_diff_pct_x', 'min_volume_vol_diff_x', 'min_volume_vol_diff_pct_x', 'return_vol_8H_x', 'return_vol_16H_x', 'volume_vol_8H_x', 'volume_vol_16H_x', 'hour_vol_diff_x', 'hour_vol_diff_pct_x', 'hour_volume_vol_diff_x', 'hour_volume_vol_diff_pct_x', 'return_vol_5D_x', 'return_vol_10D_x', 'return_vol_30D_x', 'volume_vol_5D_x', 'volume_vol_10D_x', 'volume_vol_30D_x', 'daily_vol_diff_x', 'daily_vol_diff_pct_x', 'daily_vol_diff30_x', 'daily_vol_diff_pct30_x', 'daily_volume_vol_diff_x', 'daily_volume_vol_diff_pct_x', 'daily_volume_vol_diff30_x', 'daily_volume_vol_diff_pct30_x', 'vw_y', 'n_y', 'return_vol_240M_y', 'volume_vol_240M_y', 'return_vol_450M_y', 'volume_vol_450M_y', 'min_vol_diff_y', 'min_vol_diff_pct_y', 'min_volume_vol_diff_y', 'min_volume_vol_diff_pct_y', 'return_vol_8H_y', 'return_vol_16H_y', 'volume_vol_8H_y', 'volume_vol_16H_y', 'hour_vol_diff_y', 'hour_vol_diff_pct_y', 'hour_volume_vol_diff_y', 'hour_volume_vol_diff_pct_y', 'return_vol_5D_y', 'return_vol_10D_y', 'return_vol_30D_y', 'volume_vol_5D_y', 'volume_vol_10D_y', 'volume_vol_30D_y', 'daily_vol_diff_y', 'daily_vol_diff_pct_y', 'daily_vol_diff30_y', 'daily_vol_diff_pct30_y', 'daily_volume_vol_diff_y', 'daily_volume_vol_diff_pct_y', 'daily_volume_vol_diff30_y', 'daily_volume_vol_diff_pct30_y'], axis=1, inplace=True)
# data['roc_diff'] = data['roc'] - data['roc5']
# data['range_vol_diff5'] = (data['range_vol'] - data['range_vol5MA'])
# data['close_diff_deviation3'] = abs(data['close_diff3'])/(data['threeD_stddev50']*100)
# data['close_diff_deviation'] = abs(data['close_diff'])/(data['oneD_stddev50']*100)
# data.dropna(subset=['date'], inplace=True)
# data['day_of_week'] = data['date'].apply(lambda x: x.dayofweek).astype(int)
# data['day_of_month'] = data['date'].apply(lambda x: x.day).astype(int)
# data['month'] = data['date'].apply(lambda x: x.month).astype(int)
# data['year'] = data['date'].apply(lambda x: x.year).astype(int)
# data.replace([np.inf, -np.inf], 0, inplace=True)
data.to_csv(f'/Users/charlesmiller/Documents/LOSERS/2018-01-01_2023-10-19.csv', index=False)

In [12]:
def run_temporal_simulation(features, target_label, target_value, dataset_start_date, evaluation_start_date, evaluation_end_date):
    tp_avg_list = []
    gross_accuracy_list = []
    # fn_list = []
    # tn_list = []
    
    hyperparams = {'subsample': 0.6, 'num_round': 1000, 'min_child_weight': 10, 'max_depth': 10, 'learning_rate': 0.1, 'gamma': 2, 'colsample_bytree': 1}
    dates_list = build_evaluation_period(evaluation_start_date, evaluation_end_date)
    # date = dates_list[-1]
    # response = model_runner_data(model_name, dataset_name, title, features, target_label, target_value,dataset_start_date,date['dataset_end'],date['deployment_date'], str(features),str(hyperparams), hyperparams)
    for date in dates_list:
        tp, fp, fn, tn = model_runner_temporal_simulation(features, target_label, target_value,dataset_start_date,date['dataset_end'],date['deployment_date'], hyperparams)
        tp_avg_list.append(tp)
        gross_accuracy_list.append((tp-fp))

    return tp_avg_list, gross_accuracy_list


In [13]:
number_of_simulations = 20
now = datetime.now()
title = 'losers_1d'
total_feature_list = ['hour','year','month','day_of_month',
       'price7', 'price14', 'adjusted_volume', 'vol7', 'vol14',
       'rsi', 'rsi3', 'rsi5', 'roc', 'roc3', 'roc5', 'threeD_returns_close',
       'oneD_returns_close', 'range_vol', 'range_vol5MA', 'range_vol10MA',
       'range_vol25MA', 'oneD_stddev50', 'threeD_stddev50', 'cmf',
       'close_diff', 'close_diff3', 'close_diff5', 'v_diff_pct', 'adx',
       'volume_10MA', 'volume_25MA', 'price_10MA', 'price_25MA',
       'volume_10DDiff', 'volume_25DDiff', 'price_10DDiff', 'price_25DDiff',
       'day_of_week','close_diff_deviation3','close_diff_deviation','roc_diff','range_vol_diff5',
       'return_vol_240M', 'volume_vol_240M', 'return_vol_450M', 'volume_vol_450M', 'min_vol_diff', 'min_vol_diff_pct', 'min_volume_vol_diff', 
       'min_volume_vol_diff_pct', 'return_vol_8H', 'return_vol_16H', 'volume_vol_8H', 'volume_vol_16H', 'hour_vol_diff', 'hour_vol_diff_pct', 'hour_volume_vol_diff', 
       'hour_volume_vol_diff_pct', 'return_vol_5D', 'return_vol_10D', 'return_vol_30D', 'volume_vol_5D', 'volume_vol_10D', 'volume_vol_30D', 'daily_vol_diff', 'daily_vol_diff_pct',
       'daily_vol_diff30', 'daily_vol_diff_pct30', 'daily_volume_vol_diff', 'daily_volume_vol_diff_pct', 'daily_volume_vol_diff30', 'daily_volume_vol_diff_pct30']
testing_features = ['hour','month','day_of_month','price7', 
       'rsi', 'rsi3', 'rsi5', 'roc', 'roc3', 'roc5', 'threeD_returns_close',
       'oneD_returns_close', 'range_vol', 'oneD_stddev50', 'threeD_stddev50', 'cmf',
       'close_diff', 'close_diff3', 'close_diff5', 'v_diff_pct', 'adx',
       'price_10DDiff', 'price_25DDiff',
       'day_of_week','close_diff_deviation3','close_diff_deviation','roc_diff','range_vol_diff5',
       'return_vol_240M', 'volume_vol_240M', 'return_vol_450M', 'volume_vol_450M', 'min_vol_diff', 'min_vol_diff_pct', 'min_volume_vol_diff', 
       'min_volume_vol_diff_pct', 'return_vol_8H', 'return_vol_16H', 'volume_vol_8H', 'volume_vol_16H', 'hour_vol_diff', 'hour_vol_diff_pct', 'hour_volume_vol_diff', 
       'hour_volume_vol_diff_pct', 'return_vol_5D', 'return_vol_10D', 'return_vol_30D', 'volume_vol_5D', 'volume_vol_10D', 'volume_vol_30D', 'daily_vol_diff', 'daily_vol_diff_pct',
       'daily_vol_diff30', 'daily_vol_diff_pct30', 'daily_volume_vol_diff', 'daily_volume_vol_diff_pct', 'daily_volume_vol_diff30', 'daily_volume_vol_diff_pct30','SPY_1D','SPY_3D','SPY_diff','SPY_diff3']

target_label = 'one_max'
target_value = .015
results_array = []

i = 0
while i < number_of_simulations:
       model_name = f"{title}_temporal_simulation_{i}"
       print(model_name)
       random_int = random.randint(20, 40)
       features = random.sample(testing_features, random_int)
       print(features)
       tp_avg_list, gross_accuracy_list = run_temporal_simulation(features, target_label, target_value, dataset_start_date=datetime(2018,1,1,tzinfo=pytz.timezone('US/Eastern')), evaluation_start_date=datetime(2022,10,24,tzinfo=pytz.timezone('US/Eastern')), evaluation_end_date=datetime(2023,10,2,tzinfo=pytz.timezone('US/Eastern')))        
       tp_avg = sum(tp_avg_list)/len(tp_avg_list)
       gross_accuracy = sum(gross_accuracy_list)/len(gross_accuracy_list)
       print(tp_avg, gross_accuracy)
       print()
       results_array.append({"model_name": model_name, "features": features, "tp_avg": tp_avg, "gross_accuracy": gross_accuracy,"num_features": random_int})
       i += 1

results_df = pd.DataFrame(results_array)
results_df.to_csv(f'/Users/charlesmiller/Documents/temporal_simulation_results/{title}/{now.year}_{now.month}_{now.day}.csv', index=False)