In [None]:
def create_portfolio(portfolio_dict, data_test_temp, y_scores, portfolio_dict_index, tickers, day20, day21, i):
    """
    Adds an additional key "{day21}-portfolio" in portfolio_dict recording the pred_score and avg_excess_ret
    """
    columns = ['pred_score', 'avg_excess_ret']
    portfolio_dict[f'{day21}-portfolio'] = pd.DataFrame(index=portfolio_dict_index, columns=columns)
    
    df_temp = pd.DataFrame(index=tickers, columns=['prediction_score', 'predicted_class', 'excess_return'])
    df_temp['prediction_score'] = y_scores
    df_temp['excess_return'] = data_test_temp[data_test_temp['datadate'] == day20]['excess_ret_d'].values.reshape(nt, )

    # If qcut does not work, the model needs to be retrained
    df_temp['DistinctRank'] = df_temp['prediction_score'].rank(method='first')
    df_temp['predicted_class'] = pd.qcut(df_temp['DistinctRank'], 10, labels=labels)
    
    for j in range(1, 11):
        temp = df_temp[df_temp['predicted_class']==j]
        portfolio_dict[f'{day21}-portfolio'].loc[j, 'pred_score'] = temp['prediction_score'].mean()
        portfolio_dict[f'{day21}-portfolio'].loc[j, 'avg_excess_ret'] = temp['excess_return'].mean()
    
    portfolio_dict[f'{day21}-portfolio'].loc['H-L', :] = portfolio_dict[f'{day21}-portfolio'].loc[10, :] - portfolio_dict[f'{day21}-portfolio'].loc[1, :]
    
    return portfolio_dict

In [None]:
def calculate_SR(seq_length, ltd, pred_score_avg_all, excess_return_all, portfolio_dict, SR_dict):
    """
    Calculates the SR using portfolio_dict and saves the SR in SR_dict
    """
    # Initialize the pred and avg columns
    pred_score_avg = np.zeros((10, ))
    excess_return = np.zeros((10, seq_length))
    
    for j in range(seq_length):
        
        day21 = all_days[ltd + 2 + j]
        key = f'{day21}-portfolio'
        
        pred_score_avg += portfolio_dict[key].loc[range(1, 11), 'pred_score'].values.astype(float)
        pred_score_avg_all += portfolio_dict[key].loc[range(1, 11), 'pred_score'].values.astype(float)
        excess_return[:, j] = portfolio_dict[key].loc[range(1, 11), 'avg_excess_ret'].values.astype(float)
        excess_return_all[:, num_iter * seq_length + j] = portfolio_dict[key].loc[range(1, 11), 'avg_excess_ret'].values.astype(float)
    
    pred_score_avg /= seq_length
    excess_return_avg = np.mean(excess_return, axis=1)
    excess_return_std = np.std(excess_return, axis=1)
    
    key = f'{all_days[ltd+1]}-{all_days[ltd+seq_length]}'
    SR_dict[key] = pd.DataFrame(index=[k for k in range(1, 11)], columns=['pred_score', 'avg_excess_ret', 'std_excess_ret', 'SR'])
    SR_dict[key].loc[:, 'pred_score'] = pred_score_avg
    SR_dict[key].loc[:, 'avg_excess_ret'] = excess_return_avg
    SR_dict[key].loc[:, 'std_excess_ret'] = excess_return_std
    SR_dict[key].loc[:, 'SR'] = excess_return_avg / excess_return_std * np.sqrt(252)
    
    # Calculate H-L
    SR_dict[key].loc['H-L', 'pred_score'] = SR_dict[key].loc[10, 'pred_score'] - SR_dict[key].loc[1, 'pred_score']
    H_minus_L_ret = excess_return[-1, :] - excess_return[0, :]
    SR_dict[key].loc['H-L', 'avg_excess_ret'] = np.mean(H_minus_L_ret)
    SR_dict[key].loc['H-L', 'std_excess_ret'] = np.std(H_minus_L_ret)
    SR_dict[key].loc['H-L', 'SR'] = SR_dict[key].loc['H-L', 'avg_excess_ret'] / SR_dict[key].loc['H-L', 'std_excess_ret'] * np.sqrt(252)
    print(f"H-L SR is {SR_dict[key].loc['H-L', 'SR']}")
    
    return SR_dict, pred_score_avg_all, excess_return_all

In [None]:
def retrain_model(data, seq_length, ftd, ltd, retrain_seed, warm_up, num_stocks, model_params,
                  portfolio_dict, tickers, i, SR_dict, pred_score_avg_dict, excess_return_dict):
    """
    Retrains the model if the corresponding entry of retrain is True.
    Bring the dates back by two testing periods and train from scratch once + 1 retrain
    """
    
    SR_dict_copy = copy.deepcopy(SR_dict)
    
    # Obtain train data and train on the first testing period
    ftd -= (warm_up+1)*seq_length
    ltd -= (warm_up+1)*seq_length
    # print(f'Training the model from {all_days[ftd]} to {all_days[ltd]}')
    # data_train, x_train, y_train, data_test, x_test, y_test, ret_d_train, ret_d_test, sic_train, sic_test = prep_train_test_data(data, seq_length, ftd, ltd)
    
    # # Initialize a new model
    # model = CNN(x_train.shape[1:], retrain_seed, **model_params)
    # model.compile_model()
    
    # # Train from scratch
    # model.train_model(x_train, y_train, ret_d_train, sic_train)
    # y_pred_train = model.model.predict([y_train, x_train, ret_d_train, sic_train], batch_size=4096, verbose=0)
    # print(f'Train loss is {custom_loss_np(y_train, y_pred_train, ret_d_train)}')
    # y_pred_test = model.model.predict([y_test, x_test, ret_d_test, sic_test], batch_size=4096, verbose=0)
    # print(f'Test loss is {custom_loss_np(y_test, y_pred_test, ret_d_test)}')
    
    # Retrain on the next `warm_up` periods
    # total_asset = [1]
    # total = 1
    # position_dict = {}

    # Since the portfolio of a previous date is not saved, we are assuming the next day positions are equal weighted
    total_asset_dict[f'retr_{i}_retro'] = total_asset_dict[f'retr_{i}_retro'][:-20*warm_up]
    total_dict[f'retr_{i}_retro'] = total_asset_dict[f'retr_{i}_retro'][-1]
    position_dict_all[f'retr_{i}_retro'] = {}
    for integer in range(num_stocks):
        position_dict_all[f'retr_{i}_retro'][integer] = total_dict[f'retr_{i}_retro'] / num_stocks

    for j in range(warm_up+1):

        # if j == 0:
        #     first_run = True
        # else:
        #     first_run = False

        ftd += seq_length
        ltd += seq_length
        print(f'Training the model from {all_days[ftd]} to {all_days[ltd]}')
        data_train, x_train, y_train, data_test, x_test, y_test, ret_d_train, ret_d_test, sic_train, sic_test = prep_train_test_data(data, seq_length, ftd, ltd, all_days)

        # Initialize a new model
        model = CNN(x_train.shape[1:], retrain_seed, **model_params)
        model.compile_model()
        
        model.retrain_model(x_train, y_train, ret_d_train, sic_train)
        y_pred_train = model.model.predict([y_train, x_train, ret_d_train, sic_train], batch_size=4096, verbose=0)
        print(f'Train loss is {custom_loss_np(y_train, y_pred_train, ret_d_train)}')
        y_pred_test = model.model.predict([y_test, x_test, ret_d_test, sic_test], batch_size=4096, verbose=0)
        print(f'Test loss is {custom_loss_np(y_test, y_pred_test, ret_d_test)}')

        pred_score_avg = 0
        excess_return = np.zeros((1, seq_length))
        for day in range(seq_length):

            day1 = all_days[ltd - seq_length + 2 + day]
            day20 = all_days[ltd + 1 + day]
            day21 = all_days[ltd + 2 + day]
            day22 = all_days[ltd + 3 + day]

            data_test_temp = data_test[(data_test['datadate'] >= day1) & (data_test['datadate'] <= day20)]
            pivot_data = data_test_temp[factors+['datadate', 'tic']].pivot_table(index='tic', columns='datadate')
            x_test = pivot_data.values.reshape(nt, len(factors), seq_length)
            y_test = data_test_temp[data_test_temp['datadate'] == day20]['rank'].values.reshape(nt, )
            ret_d_test = data_test_temp[data_test_temp['datadate'] == day20]['ret_d'].values.reshape(nt, )
            sic_test = data_test_temp[data_test_temp['datadate'] == day20]['sic'].values.reshape(nt, )
            sic_test = np.nan_to_num(sic_test)
            x_test = np.transpose(x_test, (0, 2, 1))
            y_test[:] = y_test[:] + 2

            y_pred_test = model.model.predict([y_test, x_test, ret_d_test, sic_test], batch_size=4096, verbose=0)
            y_scores = np.dot(y_pred_test, np.array([-2, -1, 0, 1, 2]))
            top_indices = np.argsort(y_scores)[-num_stocks:]
            top_stocks = [num_to_tic_dict[num] for num in top_indices]
            
            # Calculate SR PT1
            total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict[f'retr_{i}_retro'], total_asset_dict[f'retr_{i}_retro'], 
                                                       position_dict_all[f'retr_{i}_retro'], top_stocks, data_test_temp)
            total_dict[f'retr_{i}_retro'] = total
            total_asset_dict[f'retr_{i}_retro'] = total_asset
            position_dict_all[f'retr_{i}_retro'] = position_dict

            pred_score_avg += sum([y_scores[ind] for ind in top_indices]) / num_stocks
            excess_return[0, day] = (total_asset[-1] - total_asset[-2]) / total_asset[-2] - date_to_rfr_dict[day21]
        
        # Calculate SR PT2
        pred_score_avg /= seq_length
        excess_return_avg = np.mean(excess_return)
        excess_return_std = np.std(excess_return)

        key = f'{all_days[ltd+1]}-{all_days[ltd+seq_length]}'
        SR_dict_copy[f'retr_{i}'].loc[key, 'pred_score'] = pred_score_avg
        SR_dict_copy[f'retr_{i}'].loc[key, 'avg_excess_ret'] = excess_return_avg
        SR_dict_copy[f'retr_{i}'].loc[key, 'std_excess_ret'] = excess_return_std
        SR_dict_copy[f'retr_{i}'].loc[key, 'SR'] = excess_return_avg / excess_return_std * np.sqrt(252)
        print(f"Model {i} has SR {SR_dict_copy[f'retr_{i}'].loc[key, 'SR']}")
        
        if j == warm_up:
            
            retrain_score = 0

            for w in range(warm_up):

                key = f'{all_days[ltd+1-(w+1)*seq_length]}-{all_days[ltd-w*seq_length]}'
                print(f"Model {i} SR {key}: original is {SR_dict[f'retr_{i}'].loc[key, 'SR']}; retrained is {SR_dict_copy[f'retr_{i}'].loc[key, 'SR']}")
                if SR_dict_copy[f'retr_{i}'].loc[key, 'SR'] > SR_dict[f'retr_{i}'].loc[key, 'SR']:
                    retrain_score += 1
            
            if retrain_score >= warm_up - 1:
                print(f'Model {i}: retrained model shows improvement; using the new model')
                replace = True
                # Have to recalculate return_dict[f'retr_{i}_retro'] so that the exp weighted ensemble is calculated correctly
                ldr = total_asset_dict[f'retr_{i}_retro'][-21]
                fdr = total_asset_dict[f'retr_{i}_retro'][-20*6-1]
                ret = (ldr - fdr) / fdr
                return_dict[f'retr_{i}_retro'][-1] = ret
                print(f"Model {i} with retrain retrospectively changed {ret*100} percent from {all_days[ltd+1-(warm_up-7)*seq_length]} to {all_days[ltd-(warm_up-1)*seq_length]}")
            else:
                print(f'Model {i}: SR did not improve; using original model instead')
                replace = False

                # key1 = f'{all_days[ltd+1-3*seq_length]}-{all_days[ltd-2*seq_length]}'
                # key2 = f'{all_days[ltd+1-2*seq_length]}-{all_days[ltd-seq_length]}'
                # key3 = f'{all_days[ltd+1-seq_length]}-{all_days[ltd]}'

                # print(f"SR {key1}: original SR is {SR_dict_copy[f'orig_{i}'].loc[key1, 'SR']}; retrained SR is {SR_dict_copy[f'retr_{i}'].loc[key1, 'SR']}")
                # print(f"SR {key2}: original SR is {SR_dict_copy[f'orig_{i}'].loc[key2, 'SR']}; retrained SR is {SR_dict_copy[f'retr_{i}'].loc[key2, 'SR']}")
                # print(f"SR {key3}: original SR is {SR_dict_copy[f'orig_{i}'].loc[key3, 'SR']}; retrained SR is {SR_dict_copy[f'retr_{i}'].loc[key3, 'SR']}")
                # if SR_dict_copy[f'retr_{i}'].loc[key1, 'SR'] < SR_dict[f'retr_{i}'].loc[key1, 'SR'] or SR_dict_copy[f'retr_{i}'].loc[key2, 'SR'] < SR_dict[f'retr_{i}'].loc[key2, 'SR'] or SR_dict_copy[f'retr_{i}'].loc[key3, 'SR'] < SR_dict[f'retr_{i}'].loc[key3, 'SR']:
                # if SR_dict_copy[f'retr_{i}'].loc[key1, 'SR'] < -1 or SR_dict_copy[f'retr_{i}'].loc[key2, 'SR'] < -1:
            
                #     print(f'Model {i}: H-L SR from did not improve; use original model instead')
                #     repeat = True
                #     break

                # else:
                #     print(f'Model {i}: retrained model shows improvement; continuing')
                #     repeat = False

    return model, replace #, repeat

In [None]:
def invest(day, day1, day20, day21, day22, num_stocks, total, total_asset, position_dict, top_stocks, data_test):
    '''
    Input:
        day: from 0 to (last_test_day - first_test_day - 1)
        day1: first_test_day
        day20: last_test_day
        day21: day of execution
        day22: day after execution to calculate what the holding amount will be
        num_stocks: the number of stocks to be traded daily
        total: total amount of holdings
        total_asset: sequence of total amount of holdings, starting with 1, next day's asset amount, ... etc
        position_dict: `num_stocks` items
            keys: tickers of current holdings
            values: amount of current holdings
        top_stocks: list of top `num_stocks` recommended stocks
    Output:
        total_asset: updated with one more entry
        total: updated with the new total, i.e., the last entry of the updated total_asset
        position_dict: positions on day22
    '''
    # if day != 0:
    #     uninvested = 0
    # elif (day == 0) & (first_run == False):
    #     uninvested = 0
    # elif (day == 0) & (first_run == True):
    #     uninvested = 1

    if not position_dict:
        uninvested = 1
    else:
        uninvested = 0
    to_hold = []
    to_delete = []

    for key in position_dict:
        # Sell
        if key not in top_stocks:
            uninvested += position_dict[key]
            to_delete.append(key)
#             print(f'Sell {key} on {day21} of market open')
        # Hold
        else:
            to_hold.append(key)

    # Remove sold stocks from position
    for key in to_delete:
        del position_dict[key]

#     print(f'Hold {to_hold} on {day21} market open')

    # Buy stocks
    for index in list(set(top_stocks) - set(to_hold)):
        position_dict[index] = uninvested / (num_stocks - len(to_hold))
#         print(f'Buy {index} on {day21} market open')

    # Calculate return right away
    for key in top_stocks:
        percent_change = data_test[(data_test['datadate'] == day20) & (data_test['tic'] == key)]['ret_d'].values[0]
        total += position_dict[key] * percent_change
        # This calculates the position on the next day
        position_dict[key] = position_dict[key] * (1 + percent_change)
    # print(f'position on {day22} market open will be {position_dict}')

    total_asset.append(total)
    print(f'Total asset on {day22} will be {total}')
    print(f'It should be the same as {sum(position_dict.values())}')
    
    return total_asset, total, position_dict

In [None]:
def simulate_comparison(model_dict, ftd, ltd, num_stocks, total_dict, first_run, total_asset_dict, position_dict_all, graph,
                        data_test, seq_length, nt, num_runs, retrain, return_dict,
                        # SR arguments
                        num_iter, portfolio_dict, pred_score_avg_dict, excess_return_dict, SR_dict):
    '''
    Simulate investment and calculate SR
    model_dict has keys orig_{i}
    total_dict, total_asset_dict, position_dict_all have keys orig_{i}, orig_ensemble
    graph: True or False, determining whether a plot is generated
    '''
    if not first_run:
        y_score_weights_orig = [0]*num_of_models
        y_score_weights_retr = [0]*num_of_models
        for i in range(num_of_models):
            y_score_weights_orig[i] = math.exp(return_dict[f'orig_{i}'][-1])
            y_score_weights_retr[i] = math.exp(return_dict[f'retr_{i}_retro'][-1])
        sum_score_weights_orig = sum(y_score_weights_orig)
        sum_score_weights_retr = sum(y_score_weights_retr)
        for i in range(num_of_models):
            y_score_weights_orig[i] /= sum_score_weights_orig
            y_score_weights_retr[i] /= sum_score_weights_retr
            print(f'Weight of original model {i} is {y_score_weights_orig[i]}')
            print(f'Weight of retrained model {i} is {y_score_weights_retr[i]}')

    for day in range(seq_length):
        
        day1 = all_days[ltd - seq_length + 2 + day]
        day20 = all_days[ltd + 1 + day]
        day21 = all_days[ltd + 2 + day]
        day22 = all_days[ltd + 3 + day]

        data_test_temp = data_test[(data_test['datadate'] >= day1) & (data_test['datadate'] <= day20)]
        pivot_data = data_test_temp[factors+['datadate', 'tic']].pivot_table(index='tic', columns='datadate')
        x_test = pivot_data.values.reshape(nt, len(factors), seq_length)
        y_test = data_test_temp[data_test_temp['datadate'] == day20]['rank'].values.reshape(nt, )
        ret_d_test = data_test_temp[data_test_temp['datadate'] == day20]['ret_d'].values.reshape(nt, )
        sic_test = data_test_temp[data_test_temp['datadate'] == day20]['sic'].values.reshape(nt, )
        sic_test = np.nan_to_num(sic_test)
        x_test = np.transpose(x_test, (0, 2, 1))
        y_test[:] = y_test[:] + 2

        # Find today's risk-free rate
        rfr = date_to_rfr_dict[day21]
        
        y_scores_orig = [0]*num_of_models
        y_scores_retr = [0]*num_of_models
        for i in range(num_of_models):
            
            try:
                y_pred_orig = model_dict[f'orig_{i}'].predict([y_test, x_test, ret_d_test, sic_test], batch_size=4096, verbose=0)
            except:
                y_pred_orig = model_dict[f'orig_{i}'].model.predict([y_test, x_test, ret_d_test, sic_test], batch_size=4096, verbose=0)
            try:
                y_pred_retr = model_dict[f'retr_{i}'].predict([y_test, x_test, ret_d_test, sic_test], batch_size=4096, verbose=0)
            except:
                y_pred_retr = model_dict[f'retr_{i}'].model.predict([y_test, x_test, ret_d_test, sic_test], batch_size=4096, verbose=0)
            y_scores_orig[i] = np.dot(y_pred_orig, np.array([-2, -1, 0, 1, 2]))
            y_scores_retr[i] = np.dot(y_pred_retr, np.array([-2, -1, 0, 1, 2]))
            top_indices_orig = np.argsort(y_scores_orig[i])[-num_stocks:]
            top_stocks_orig = [num_to_tic_dict[num] for num in top_indices_orig]
            print(f'top_stocks by original model {i} to buy on {day21} are {top_stocks_orig}')
            top_indices_retr = np.argsort(y_scores_retr[i])[-num_stocks:]
            top_stocks_retr = [num_to_tic_dict[num] for num in top_indices_retr]
            print(f'top_stocks by retrained model {i} to buy on {day21} are {top_stocks_retr}')
            total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict[f'orig_{i}'],
                                                total_asset_dict[f'orig_{i}'], position_dict_all[f'orig_{i}'], top_stocks_orig, data_test_temp)
            total_dict[f'orig_{i}'] = total
            total_asset_dict[f'orig_{i}'] = total_asset
            position_dict_all[f'orig_{i}'] = position_dict
            total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict[f'retr_{i}'],
                                                total_asset_dict[f'retr_{i}'], position_dict_all[f'retr_{i}'], top_stocks_retr, data_test_temp)

            total_dict[f'retr_{i}'] = total
            total_asset_dict[f'retr_{i}'] = total_asset
            position_dict_all[f'retr_{i}'] = position_dict
            if len(total_asset_dict[f'retr_{i}_retro']) < len(total_asset_dict[f'retr_{i}']):
                total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict[f'retr_{i}_retro'],
                                                    total_asset_dict[f'retr_{i}_retro'], position_dict_all[f'retr_{i}_retro'], top_stocks_retr, data_test_temp)

                total_dict[f'retr_{i}_retro'] = total
                total_asset_dict[f'retr_{i}_retro'] = total_asset
                position_dict_all[f'retr_{i}_retro'] = position_dict
            
            # Calculate daily excess return for top `num_stocks` portfolios
            portfolio_dict[f'orig_{i}'].loc[f'{day21}-portfolio', 'pred_score'] = sum([y_scores_orig[i][ind] for ind in top_indices_orig]) / num_stocks
            prev, today = total_asset_dict[f'orig_{i}'][-2], total_asset_dict[f'orig_{i}'][-1]
            portfolio_dict[f'orig_{i}'].loc[f'{day21}-portfolio', 'avg_excess_ret'] = (today - prev) / prev - rfr
            portfolio_dict[f'retr_{i}'].loc[f'{day21}-portfolio', 'pred_score'] = sum([y_scores_retr[i][ind] for ind in top_indices_retr]) / num_stocks
            prev, today = total_asset_dict[f'retr_{i}'][-2], total_asset_dict[f'retr_{i}'][-1]
            portfolio_dict[f'retr_{i}'].loc[f'{day21}-portfolio', 'avg_excess_ret'] = (today - prev) / prev - rfr

        # Calculate ensembles
        if first_run:
            y_score_ensem_orig = sum(y_scores_orig) / num_of_models
            y_score_ensem_retr = sum(y_scores_retr) / num_of_models
        else:
            y_score_ensem_orig = np.zeros((y_scores_orig[0].shape))
            for i in range(num_of_models):
                y_score_ensem_orig += y_scores_orig[i] * y_score_weights_orig[i]
            y_score_ensem_retr = np.zeros((y_scores_retr[0].shape))
            for i in range(num_of_models):
                y_score_ensem_retr += y_scores_retr[i] * y_score_weights_retr[i]
            
        
        # Original return weighted ensemble
        top_indices = np.argsort(y_score_ensem_orig)[-num_stocks:]
        top_stocks = [num_to_tic_dict[num] for num in top_indices]
        print(f'top_stocks by original return weighted ensemble to buy on {day21} are {top_stocks}')
        total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict['orig_ensemble'], total_asset_dict['orig_ensemble'], 
                                                    position_dict_all['orig_ensemble'], top_stocks, data_test_temp)
        total_dict['orig_ensemble'] = total
        total_asset_dict['orig_ensemble'] = total_asset
        position_dict_all['orig_ensemble'] = position_dict
        portfolio_dict['orig_ensemble'].loc[f'{day21}-portfolio', 'pred_score'] = sum([y_score_ensem_orig[ind] for ind in top_indices]) / num_stocks
        prev, today = total_asset_dict['orig_ensemble'][-2], total_asset_dict['orig_ensemble'][-1]
        portfolio_dict['orig_ensemble'].loc[f'{day21}-portfolio', 'avg_excess_ret'] = (today - prev) / prev - rfr

        # Original equal weighted ensemble
        y_score_ensem = sum(y_scores_orig) / num_of_models
        top_indices = np.argsort(y_score_ensem)[-num_stocks:]
        top_stocks = [num_to_tic_dict[num] for num in top_indices]
        print(f'top_stocks by original equal weighted ensemble to buy on {day21} are {top_stocks}')
        total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict['orig_equal'], total_asset_dict['orig_equal'], 
                                                    position_dict_all['orig_equal'], top_stocks, data_test_temp)
        total_dict['orig_equal'] = total
        total_asset_dict['orig_equal'] = total_asset
        position_dict_all['orig_equal'] = position_dict
        portfolio_dict['orig_equal'].loc[f'{day21}-portfolio', 'pred_score'] = sum([y_score_ensem[ind] for ind in top_indices]) / num_stocks
        prev, today = total_asset_dict['orig_equal'][-2], total_asset_dict['orig_equal'][-1]
        portfolio_dict['orig_equal'].loc[f'{day21}-portfolio', 'avg_excess_ret'] = (today - prev) / prev - rfr

        # With retrain return weighted ensemble
        top_indices = np.argsort(y_score_ensem_retr)[-num_stocks:]
        top_stocks = [num_to_tic_dict[num] for num in top_indices]
        print(f'top_stocks by retrained return weighted ensemble to buy on {day21} are {top_stocks}')
        total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict['retr_ensemble'], total_asset_dict['retr_ensemble'], 
                                                    position_dict_all['retr_ensemble'], top_stocks, data_test_temp)
        total_dict['retr_ensemble'] = total
        total_asset_dict['retr_ensemble'] = total_asset
        position_dict_all['retr_ensemble'] = position_dict
        portfolio_dict['retr_ensemble'].loc[f'{day21}-portfolio', 'pred_score'] = sum([y_score_ensem_retr[ind] for ind in top_indices]) / num_stocks
        prev, today = total_asset_dict['retr_ensemble'][-2], total_asset_dict['retr_ensemble'][-1]
        portfolio_dict['retr_ensemble'].loc[f'{day21}-portfolio', 'avg_excess_ret'] = (today - prev) / prev - rfr

        # With retrain equal weighted ensemble
        y_score_ensem = sum(y_scores_retr) / num_of_models
        top_indices = np.argsort(y_score_ensem)[-num_stocks:]
        top_stocks = [num_to_tic_dict[num] for num in top_indices]
        print(f'top_stocks by retrained equal weighted ensemble to buy on {day21} are {top_stocks}')
        total_asset, total, position_dict = invest(day, day1, day20, day21, day22, num_stocks, total_dict['retr_equal'], total_asset_dict['retr_equal'], 
                                                    position_dict_all['retr_equal'], top_stocks, data_test_temp)
        total_dict['retr_equal'] = total
        total_asset_dict['retr_equal'] = total_asset
        position_dict_all['retr_equal'] = position_dict
        portfolio_dict['retr_equal'].loc[f'{day21}-portfolio', 'pred_score'] = sum([y_score_ensem[ind] for ind in top_indices]) / num_stocks
        prev, today = total_asset_dict['retr_equal'][-2], total_asset_dict['retr_equal'][-1]
        portfolio_dict['retr_equal'].loc[f'{day21}-portfolio', 'avg_excess_ret'] = (today - prev) / prev - rfr
        
    # Calculate SR for this period and save excess return for calculating the SR of the whole period
    for i in range(num_of_models):
        
        pred_score_avg_orig = 0
        excess_return_orig = np.zeros((1, seq_length))
        pred_score_avg_retr = 0
        excess_return_retr = np.zeros((1, seq_length))
        
        for j in range(seq_length):
            
            day21 = all_days[ltd + 2 + j]
            key = f'{day21}-portfolio'
            pred_score_avg_orig += portfolio_dict[f'orig_{i}'].loc[key, 'pred_score']
            pred_score_avg_retr += portfolio_dict[f'retr_{i}'].loc[key, 'pred_score']
            pred_score_avg_dict[f'orig_{i}'] += portfolio_dict[f'orig_{i}'].loc[key, 'pred_score']
            pred_score_avg_dict[f'retr_{i}'] += portfolio_dict[f'retr_{i}'].loc[key, 'pred_score']
            excess_return_orig[0, j] = portfolio_dict[f'orig_{i}'].loc[key, 'avg_excess_ret']
            excess_return_retr[0, j] = portfolio_dict[f'retr_{i}'].loc[key, 'avg_excess_ret']
            excess_return_dict[f'orig_{i}'][0, num_iter * seq_length + j] = portfolio_dict[f'orig_{i}'].loc[key, 'avg_excess_ret']
            excess_return_dict[f'retr_{i}'][0, num_iter * seq_length + j] = portfolio_dict[f'retr_{i}'].loc[key, 'avg_excess_ret']
        pred_score_avg_orig /= seq_length
        pred_score_avg_retr /= seq_length
        excess_return_avg_orig = np.mean(excess_return_orig)
        excess_return_std_orig = np.std(excess_return_orig)
        excess_return_avg_retr = np.mean(excess_return_retr)
        excess_return_std_retr = np.std(excess_return_retr)

        key = f'{all_days[ltd+1]}-{all_days[ltd+seq_length]}'
        SR_dict[f'orig_{i}'].loc[key, 'pred_score'] = pred_score_avg_orig
        SR_dict[f'orig_{i}'].loc[key, 'avg_excess_ret'] = excess_return_avg_orig
        SR_dict[f'orig_{i}'].loc[key, 'std_excess_ret'] = excess_return_std_orig
        SR_dict[f'orig_{i}'].loc[key, 'SR'] = excess_return_avg_orig / excess_return_std_orig * np.sqrt(252)
        print(f"Original model {i} has SR {SR_dict[f'orig_{i}'].loc[key, 'SR']}")
        SR_dict[f'retr_{i}'].loc[key, 'pred_score'] = pred_score_avg_retr
        SR_dict[f'retr_{i}'].loc[key, 'avg_excess_ret'] = excess_return_avg_retr
        SR_dict[f'retr_{i}'].loc[key, 'std_excess_ret'] = excess_return_std_retr
        SR_dict[f'retr_{i}'].loc[key, 'SR'] = excess_return_avg_retr / excess_return_std_retr * np.sqrt(252)
        print(f"Model {i} with retrain has SR {SR_dict[f'retr_{i}'].loc[key, 'SR']}")
    
    # Calculate SR for ensemble
    pred_score_avg_orig = 0
    excess_return_orig = np.zeros((1, seq_length))
    pred_score_avg_retr = 0
    excess_return_retr = np.zeros((1, seq_length))
    for j in range(seq_length):
        day21 = all_days[ltd + 2 + j]
        key = f'{day21}-portfolio'
        pred_score_avg_orig += portfolio_dict['orig_ensemble'].loc[key, 'pred_score']
        pred_score_avg_retr += portfolio_dict['retr_ensemble'].loc[key, 'pred_score']
        pred_score_avg_dict['orig_ensemble'] += portfolio_dict['orig_ensemble'].loc[key, 'pred_score']
        pred_score_avg_dict['retr_ensemble'] += portfolio_dict['retr_ensemble'].loc[key, 'pred_score']
        excess_return_orig[0, j] = portfolio_dict['orig_ensemble'].loc[key, 'avg_excess_ret']
        excess_return_retr[0, j] = portfolio_dict['retr_ensemble'].loc[key, 'avg_excess_ret']
        excess_return_dict['orig_ensemble'][0, num_iter * seq_length + j] = portfolio_dict['orig_ensemble'].loc[key, 'avg_excess_ret']
        excess_return_dict['retr_ensemble'][0, num_iter * seq_length + j] = portfolio_dict['retr_ensemble'].loc[key, 'avg_excess_ret']
        
    pred_score_avg_orig /= seq_length
    pred_score_avg_retr /= seq_length
    excess_return_avg_orig = np.mean(excess_return_orig)
    excess_return_std_orig = np.std(excess_return_orig)
    excess_return_avg_retr = np.mean(excess_return_retr)
    excess_return_std_retr = np.std(excess_return_retr)

    key = f'{all_days[ltd+1]}-{all_days[ltd+seq_length]}'
    SR_dict['orig_ensemble'].loc[key, 'pred_score'] = pred_score_avg_orig
    SR_dict['orig_ensemble'].loc[key, 'avg_excess_ret'] = excess_return_avg_orig
    SR_dict['orig_ensemble'].loc[key, 'std_excess_ret'] = excess_return_std_orig
    SR_dict['orig_ensemble'].loc[key, 'SR'] = excess_return_avg_orig / excess_return_std_orig * np.sqrt(252)
    print(f"Return exponentially weighted original ensemble has SR {SR_dict['orig_ensemble'].loc[key, 'SR']}")
    SR_dict['retr_ensemble'].loc[key, 'pred_score'] = pred_score_avg_retr
    SR_dict['retr_ensemble'].loc[key, 'avg_excess_ret'] = excess_return_avg_retr
    SR_dict['retr_ensemble'].loc[key, 'std_excess_ret'] = excess_return_std_retr
    SR_dict['retr_ensemble'].loc[key, 'SR'] = excess_return_avg_retr / excess_return_std_retr * np.sqrt(252)
    print(f"Return exponentially weighted ensemble with retrain has SR {SR_dict['retr_ensemble'].loc[key, 'SR']}")

    # Calculate the return over the past 6 months
    if first_run:
        return_dict = {}
        for i in range(num_of_models):
            return_dict[f'orig_{i}'] = []
            return_dict[f'retr_{i}'] = []
            return_dict[f'retr_{i}_retro'] = []
    for i in range(num_of_models):
        ldr = total_asset_dict[f'orig_{i}'][-1]
        fdr = total_asset_dict[f'orig_{i}'][max(-len(total_asset_dict[f'orig_{i}']), -20*6-1)]
        ret = (ldr - fdr) / fdr
        return_dict[f'orig_{i}'].append(ret)
        print(f"Original model {i} changed {ret*100} percent from {all_days[ltd+23+max(-len(total_asset_dict[f'orig_{i}']), -20*6-1)]} to {all_days[ltd+22]}")
        # if len(return_dict[f'retr_{i}']) != len(return_dict[f'orig_{i}']):
        ldr = total_asset_dict[f'retr_{i}'][-1]
        fdr = total_asset_dict[f'retr_{i}'][max(-len(total_asset_dict[f'retr_{i}']), -20*6-1)]
        ret = (ldr - fdr) / fdr
        return_dict[f'retr_{i}'].append(ret)
        print(f"Model {i} with retrain changed {ret*100} percent from {all_days[ltd+23+max(-len(total_asset_dict[f'retr_{i}']), -20*6-1)]} to {all_days[ltd+22]}")
        ldr = total_asset_dict[f'retr_{i}_retro'][-1]
        fdr = total_asset_dict[f'retr_{i}_retro'][max(-len(total_asset_dict[f'retr_{i}_retro']), -20*6-1)]
        ret = (ldr - fdr) / fdr
        return_dict[f'retr_{i}_retro'].append(ret)
        print(f"Model {i} with retrain retrospectively changed {ret*100} percent from {all_days[ltd+23+max(-len(total_asset_dict[f'retr_{i}_retro']), -20*6-1)]} to {all_days[ltd+22]}")

    pred_score_avg_orig = 0
    excess_return_orig = np.zeros((1, seq_length))
    pred_score_avg_retr = 0
    excess_return_retr = np.zeros((1, seq_length))
    for j in range(seq_length):
        day21 = all_days[ltd + 2 + j]
        key = f'{day21}-portfolio'
        pred_score_avg_orig += portfolio_dict['orig_equal'].loc[key, 'pred_score']
        pred_score_avg_retr += portfolio_dict['retr_equal'].loc[key, 'pred_score']
        pred_score_avg_dict['orig_equal'] += portfolio_dict['orig_equal'].loc[key, 'pred_score']
        pred_score_avg_dict['retr_equal'] += portfolio_dict['retr_equal'].loc[key, 'pred_score']
        excess_return_orig[0, j] = portfolio_dict['orig_equal'].loc[key, 'avg_excess_ret']
        excess_return_retr[0, j] = portfolio_dict['retr_equal'].loc[key, 'avg_excess_ret']
        excess_return_dict['orig_equal'][0, num_iter * seq_length + j] = portfolio_dict['orig_equal'].loc[key, 'avg_excess_ret']
        excess_return_dict['retr_equal'][0, num_iter * seq_length + j] = portfolio_dict['retr_equal'].loc[key, 'avg_excess_ret']

    pred_score_avg_orig /= seq_length
    pred_score_avg_retr /= seq_length
    excess_return_avg_orig = np.mean(excess_return_orig)
    excess_return_std_orig = np.std(excess_return_orig)
    excess_return_avg_retr = np.mean(excess_return_retr)
    excess_return_std_retr = np.std(excess_return_retr)

    key = f'{all_days[ltd+1]}-{all_days[ltd+seq_length]}'
    SR_dict['orig_equal'].loc[key, 'pred_score'] = pred_score_avg_orig
    SR_dict['orig_equal'].loc[key, 'avg_excess_ret'] = excess_return_avg_orig
    SR_dict['orig_equal'].loc[key, 'std_excess_ret'] = excess_return_std_orig
    SR_dict['orig_equal'].loc[key, 'SR'] = excess_return_avg_orig / excess_return_std_orig * np.sqrt(252)
    print(f"Equal weighted original ensemble has SR {SR_dict['orig_equal'].loc[key, 'SR']}")
    SR_dict['retr_equal'].loc[key, 'pred_score'] = pred_score_avg_retr
    SR_dict['retr_equal'].loc[key, 'avg_excess_ret'] = excess_return_avg_retr
    SR_dict['retr_equal'].loc[key, 'std_excess_ret'] = excess_return_std_retr
    SR_dict['retr_equal'].loc[key, 'SR'] = excess_return_avg_retr / excess_return_std_retr * np.sqrt(252)
    print(f"Equal weighted ensemble with retrain has SR {SR_dict['retr_equal'].loc[key, 'SR']}")
    
    # Check SR for retraining from scratch
    key1 = f'{all_days[ltd+1-seq_length]}-{all_days[ltd]}'
    key2 = f'{all_days[ltd+1]}-{all_days[ltd+seq_length]}'
    
    for i in range(num_of_models):
        
        if num_runs[i] < 6:
            continue
        
        print(f"Model {i} SR from {all_days[ltd+1-seq_length]} to {all_days[ltd]} is {SR_dict[f'retr_{i}'].loc[key1, 'SR']}")
        print(f"Model {i} SR from {all_days[ltd+1]} to {all_days[ltd+seq_length]} is {SR_dict[f'retr_{i}'].loc[key2, 'SR']}")

        if SR_dict[f'retr_{i}'].loc[key1, 'SR'] < -1 and SR_dict[f'retr_{i}'].loc[key2, 'SR'] < -1:
            print(f'Model {i} needs to be retrained')
            if implement_retrain:
                retrain[i] = True

    if graph:
        x_axis = all_days[ltd+23-len(total_asset_dict[f'orig_{i}']):ltd+23]
        # Graph the comparison between original and retrained models
        for i in range(num_of_models):
            plt.figure(figsize=(16, 6))
            plt.plot(x_axis, total_asset_dict[f'orig_{i}'], label='Original Model')
            plt.plot(x_axis, total_asset_dict[f'retr_{i}'], label='Model with Retrain')
            indices_to_display = np.linspace(0, len(x_axis)-1, 15, dtype=int)
            plt.xticks(indices_to_display, [x_axis[i] for i in indices_to_display], rotation=45)
            plt.title(f'Total Asset Simulated by Model {i} ({num_stocks} stocks per day)')
            plt.ylabel('Total Asset')
            plt.xlabel('Date')
            plt.grid()
            plt.legend()
            plt.show()
        # Ensemble
        plt.figure(figsize=(16, 6))
        plt.plot(x_axis, total_asset_dict['orig_ensemble'], label='Original Exponentially Weighted by Return Ensemble')
        plt.plot(x_axis, total_asset_dict['orig_equal'], label='Original Equal Weighted Ensemble')
        plt.plot(x_axis, total_asset_dict['retr_ensemble'], label='Retrained Exponentially Weighted by Return Ensemble')
        plt.plot(x_axis, total_asset_dict['retr_equal'], label='Retrained Equal Weighted Ensemble')
        indices_to_display = np.linspace(0, len(x_axis)-1, 15, dtype=int)
        plt.xticks(indices_to_display, [x_axis[i] for i in indices_to_display], rotation=45)
        plt.title(f'Total Asset Simulated by Ensemble ({num_stocks} stocks per day)')
        plt.ylabel('Total Asset')
        plt.xlabel('Date')
        plt.grid()
        plt.legend()
        plt.show()
        
        # Graph all original (resp. retrained) models together
        plt.figure(figsize=(16, 6))
        for i in range(num_of_models):
            plt.plot(x_axis, total_asset_dict[f'orig_{i}'], label=f'Original Model {i}')
        plt.plot(x_axis, total_asset_dict['orig_ensemble'], label='Original Exponentially Weighted by Return Ensemble')
        plt.plot(x_axis, total_asset_dict['orig_equal'], label='Original Equal Weighted Ensemble')
        indices_to_display = np.linspace(0, len(x_axis)-1, 15, dtype=int)
        plt.xticks(indices_to_display, [x_axis[i] for i in indices_to_display], rotation=45)
        plt.title(f'Total Asset Simulated by Original Models ({num_stocks} stocks per day)')
        plt.ylabel('Total Asset')
        plt.xlabel('Date')
        plt.legend()
        plt.grid()
        plt.show()
        
        plt.figure(figsize=(16, 6))
        for i in range(num_of_models):
            plt.plot(x_axis, total_asset_dict[f'retr_{i}'], label=f'Model {i} with Retrain')
        plt.plot(x_axis, total_asset_dict['retr_ensemble'], label='Retrained Exponentially Weighted by Return Ensemble')
        plt.plot(x_axis, total_asset_dict['retr_equal'], label='Retrained Equal Weighted Ensemble')
        indices_to_display = np.linspace(0, len(x_axis)-1, 15, dtype=int)
        plt.xticks(indices_to_display, [x_axis[i] for i in indices_to_display], rotation=45)
        plt.title(f'Total Asset Simulated by Models with Retrain ({num_stocks} stocks per day)')
        plt.ylabel('Total Asset')
        plt.xlabel('Date')
        plt.legend()
        plt.grid()
        plt.show()

    return total_asset_dict, total_dict, position_dict_all, portfolio_dict, pred_score_avg_dict, excess_return_dict, SR_dict, retrain, return_dict