In [1]:
import sys
print(sys.executable)


/opt/anaconda3/envs/nqc3/bin/python


In [332]:
import warnings
warnings.filterwarnings('ignore')

In [498]:
import pandas as pd
import numpy as np
from functools import reduce
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

In [186]:
def backtester_without_TC(weights_df):
    #Update data file path here
    data = pd.read_csv('crossval_data.csv')
    weights_df = weights_df.fillna(0)
    start_date = 3500
    end_date = 3999

    initial_notional = 1

    df_returns = pd.DataFrame()

    for i in range(0,20):
        data_symbol = data[data['Symbol']==i]
        data_symbol = data_symbol['Close']
        data_symbol = data_symbol.reset_index(drop=True)   
        data_symbol = data_symbol/data_symbol.shift(1) - 1
        df_returns =  pd.concat([df_returns,data_symbol], axis=1, ignore_index=True)
    
    df_returns = df_returns.fillna(0)
    
    df_returns.index = range(start_date, start_date + len(df_returns))
    
    weights_df = weights_df.loc[start_date:end_date]    
    df_returns = df_returns.loc[start_date:end_date]
    
   
    df_returns = weights_df.mul(df_returns)
    returns_matrix = weights_df.values * df_returns.values
    notional = initial_notional
    
    returns = []

    for daily_return in returns_matrix:
        r = np.nansum(daily_return)
        returns.append(r)
        notional *= (1 + r)
    returns = np.array(returns)
    net_return = ((notional - initial_notional)/initial_notional)*100
    
    sharpe_ratio = returns.mean() / returns.std()
    return [net_return, sharpe_ratio]

In [206]:
def task1_Strategy1():
    # Load and merge training + cross-validation data
    train_data = pd.read_csv('train_data.csv')
    crossval_data = pd.read_csv('crossval_data.csv')
    df = pd.concat([train_data, crossval_data], ignore_index=True)

    # Sort by Symbol and Date
    df = df.sort_values(['Symbol', 'Date']).reset_index(drop=True)

    # Assign week numbers (every 5 consecutive trading days = 1 week)
    df['Day_Index'] = df.groupby('Symbol').cumcount()
    df['Week'] = df['Day_Index'] // 5

    # Get closing price at end of each week
    weekly_close = df.groupby(['Symbol', 'Week']).last().reset_index()

    # Initialize week 0 price as 1 to calculate Week 1 returns
    weekly_close['Prev_Close'] = weekly_close.groupby('Symbol')['Close'].shift(1)
    weekly_close.loc[weekly_close['Week'] == 0, 'Prev_Close'] = 1

    # Weekly return: (current_close - prev_close) / prev_close
    weekly_close['Weekly_Return'] = (weekly_close['Close'] - weekly_close['Prev_Close']) / weekly_close['Prev_Close']

    # Remove first row per symbol (Week 0) because it has no valid return
    weekly_close = weekly_close.dropna(subset=['Weekly_Return'])

    # For each stock-week, assign that week's return to all 5 dates in that week
    df = df.merge(weekly_close[['Symbol', 'Week', 'Weekly_Return']], on=['Symbol', 'Week'], how='left')

    # Filter only complete weeks (weeks having 5 trading days)
    week_counts = df.groupby(['Symbol', 'Week']).size().reset_index(name='count')
    complete_weeks = week_counts[week_counts['count'] == 5][['Symbol', 'Week']]
    df = df.merge(complete_weeks, on=['Symbol', 'Week'], how='inner')

    # Get all valid end-of-week dates (5th day of each week)
    df['Week_Day_Index'] = df.groupby(['Symbol', 'Week']).cumcount()
    end_of_week_df = df[df['Week_Day_Index'] == 4]

    # Build output dataframe (rows = dates, columns = symbols, values = weights)
    all_dates = df['Date'].unique()
    all_symbols = sorted(df['Symbol'].unique())
    output_df = pd.DataFrame(0, index=all_dates, columns=all_symbols)

    # Iterate over each valid end-of-week date (i.e., when a new weight decision is made)
    for date in end_of_week_df['Date'].unique():
        # Determine the last 50 full weeks for each symbol before this date
        current_info = end_of_week_df[end_of_week_df['Date'] == date]
        valid_symbols = current_info['Symbol'].values

        mean_returns = []

        for symbol in valid_symbols:
            symbol_week = current_info[current_info['Symbol'] == symbol]['Week'].values[0]
            past_50_weeks = weekly_close[
                (weekly_close['Symbol'] == symbol) &
                (weekly_close['Week'] >= symbol_week - 50) &
                (weekly_close['Week'] < symbol_week)
            ]
            if len(past_50_weeks) == 50:
                avg_return = past_50_weeks['Weekly_Return'].mean()
                mean_returns.append((symbol, avg_return))

        # Skip if not enough data
        if len(mean_returns) < 12:
            continue

        # Sort by average return
        sorted_returns = sorted(mean_returns, key=lambda x: x[1], reverse=True)
        top_6 = [s[0] for s in sorted_returns[:6]]
        bottom_6 = [s[0] for s in sorted_returns[-6:]]

        # Assign weights
        weight_row = {symbol: 0 for symbol in all_symbols}
        for s in top_6:
            weight_row[s] = -1 / 6
        for s in bottom_6:
            weight_row[s] = 1 / 6

        # Apply the same weights to all dates in the current week
        week_df = df[(df['Symbol'].isin(top_6 + bottom_6)) & (df['Week'] == symbol_week)]
        week_dates = week_df['Date'].unique()
        for d in week_dates:
            output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
            
    

    return output_df

In [207]:

def task1_Strategy2():
    train_data = pd.read_csv('train_data.csv')  # Columns: Date, Symbol, Close
    crossval_data = pd.read_csv('crossval_data.csv')
    
    # Combine and sort
    df = pd.concat([train_data, crossval_data], ignore_index=True)
    df = df.sort_values(['Date', 'Symbol']).reset_index(drop=True)
    
    # Convert Date to integer index if needed
    df['Date'] = pd.to_numeric(df['Date'], errors='coerce')
    df = df.dropna(subset=['Date'])

    # Compute SMA and LMA
    df['LMA'] = df.groupby('Symbol')['Close'].transform(lambda x: x.rolling(window=30, min_periods=1).mean())
    df['SMA'] = df.groupby('Symbol')['Close'].transform(lambda x: x.rolling(window=5, min_periods=1).mean())
    df['relative_position'] = (df['SMA'] - df['LMA']) / df['LMA']

    # Drop rows where LMA is 0 or NaN
    df = df[df['LMA'] != 0].dropna(subset=['relative_position'])

    # Initialize output: Date x Symbol matrix
    unique_dates = df['Date'].unique()
    unique_symbols = df['Symbol'].unique()
    weights_df = pd.DataFrame(0, index=unique_dates, columns=unique_symbols)

    for date in unique_dates:
        daily_df = df[df['Date'] == date].copy()
        if len(daily_df) < 10:
            continue  # Not enough stocks to rank top/bottom 5
        
        daily_df = daily_df.sort_values('relative_position', ascending=False)

        top_5 = daily_df.head(5)['Symbol']
        bottom_5 = daily_df.tail(5)['Symbol']

        weights_df.loc[date, top_5] = -1 / 5
        weights_df.loc[date, bottom_5] = 1 / 5

    weights_df = weights_df.sort_index()
    return weights_df

In [208]:
def task1_Strategy3():
    import pandas as pd

    # Load and combine data
    train_data = pd.read_csv('train_data.csv')
    crossval_data = pd.read_csv('crossval_data.csv')
    full_data = pd.concat([train_data, crossval_data], ignore_index=True)

    # Sort by Symbol and Date
    df = full_data.sort_values(['Symbol', 'Date']).reset_index(drop=True)

    # Calculate 7-day lag close
    df['past_close'] = df.groupby('Symbol')['Close'].shift(7)

    # Calculate Rate of Change (ROC)
    df['roc'] = 100 * (df['Close'] - df['past_close']) / df['past_close']

    # Drop rows where ROC is NaN (i.e., first 7 days per symbol)
    df = df.dropna(subset=['roc'])

    # Prepare output: daily weights DataFrame
    all_dates = df['Date'].unique()
    all_symbols = sorted(df['Symbol'].unique())
    output_df = pd.DataFrame(0, index=all_dates, columns=all_symbols)

    # Loop through each date and assign weights
    for date in all_dates:
        daily_df = df[df['Date'] == date].copy()

        if daily_df.empty:
            continue

        daily_df = daily_df.sort_values('roc', ascending=False)

        top_4 = daily_df.head(4).copy()
        bottom_4 = daily_df.tail(4).copy()

        top_4['weight'] = -1 / 4
        bottom_4['weight'] = 1 / 4

        weights = pd.concat([top_4, bottom_4], ignore_index=True)[['Symbol', 'weight']]

        for _, row in weights.iterrows():
            output_df.at[date, row['Symbol']] = row['weight']

    return output_df

In [209]:
def task1_Strategy4():
    import pandas as pd

    # Load and combine data
    train_data = pd.read_csv('train_data.csv')
    crossval_data = pd.read_csv('crossval_data.csv')
    df = pd.concat([train_data, crossval_data], ignore_index=True)

    # Sort by Symbol and Date
    df = df.sort_values(['Symbol', 'Date']).reset_index(drop=True)

    # Calculate 21-day SMA and Std Dev
    grouped = df.groupby('Symbol')['Close']
    df['SMA_21'] = grouped.transform(lambda x: x.rolling(window=21, min_periods=1).mean())
    df['STD_21'] = grouped.transform(lambda x: x.rolling(window=21, min_periods=1).std())

    # Calculate Resistance and Support levels
    df['Resistance'] = df['SMA_21'] + 3 * df['STD_21']
    df['Support'] = df['SMA_21'] - 3 * df['STD_21']

    # Get all unique dates and symbols
    all_dates = df['Date'].unique()
    all_symbols = sorted(df['Symbol'].unique())
    output_df = pd.DataFrame(0, index=all_dates, columns=all_symbols)

    # Iterate through each date
    for date in all_dates:
        daily_df = df[df['Date'] == date].copy()

        # Ensure support and resistance are valid (non-zero)
        daily_df = daily_df.dropna(subset=['Support', 'Resistance'])

        if daily_df.empty:
            continue

        # Calculate proximity metrics
        daily_df['proximity_resistance'] = (daily_df['Close'] - daily_df['Resistance']) / daily_df['Resistance']
        daily_df['proximity_support'] = (daily_df['Close'] - daily_df['Support']) / daily_df['Support']

        # Long: Stocks closest to support (smallest positive or most negative)
        support_ranked = daily_df.sort_values('proximity_support', ascending=True)
        top_support = support_ranked.head(4).copy()
        top_support['weight'] = 1 / 4

        # Short: Stocks farthest from resistance (most positive proximity)
        remaining = daily_df[~daily_df['Symbol'].isin(top_support['Symbol'])]
        resistance_ranked = remaining.sort_values('proximity_resistance', ascending=False)
        top_resistance = resistance_ranked.head(4).copy()
        top_resistance['weight'] = -1 / 4

        # Combine weights
        weights = pd.concat([top_support, top_resistance])[['Symbol', 'weight']]

        for _, row in weights.iterrows():
            output_df.at[date, row['Symbol']] = row['weight']

    return output_df

In [211]:
def task1_Strategy5():

    # Load and combine training + cross-validation data
    train_data = pd.read_csv('train_data.csv')
    crossval_data = pd.read_csv('crossval_data.csv')
    df = pd.concat([train_data, crossval_data], ignore_index=True)

    # Sort by Symbol and Date
    df = df.sort_values(['Symbol', 'Date']).reset_index(drop=True)

    # Calculate 14-day high and low for %K calculation
    grouped = df.groupby('Symbol')['Close']
    df['14d_high'] = grouped.transform(lambda x: x.rolling(window=14, min_periods=1).max())
    df['14d_low'] = grouped.transform(lambda x: x.rolling(window=14, min_periods=1).min())

    # Calculate %K
    df['%K'] = 100 * (df['Close'] - df['14d_low']) / (df['14d_high'] - df['14d_low'])
    df = df.dropna(subset=['%K'])

    # Get all unique dates and symbols
    all_dates = df['Date'].unique()
    all_symbols = sorted(df['Symbol'].unique())
    output_df = pd.DataFrame(0, index=all_dates, columns=all_symbols)

    # Iterate over each date
    for date in all_dates:
        daily_df = df[df['Date'] == date].copy()
        daily_df = daily_df.dropna(subset=['%K'])

        if len(daily_df) < 6:
            continue  # Need at least 6 symbols to choose top 3 and bottom 3

        # Sort by %K
        sorted_df = daily_df.sort_values('%K', ascending=True)

        # Lowest 3 -> Long (positive weight), Highest 3 -> Short (negative weight)
        bottom_3 = sorted_df.head(3).copy()
        top_3 = sorted_df.tail(3).copy()

        bottom_3['weight'] = 1 / 3
        top_3['weight'] = -1 / 3

        weights = pd.concat([bottom_3, top_3])[['Symbol', 'weight']]

        # Assign weights to the output dataframe
        for _, row in weights.iterrows():
            output_df.at[date, row['Symbol']] = row['weight']

    return output_df

Every Strategy Outputs a 4000*20 df, each column is Stock Symbol, and each row is Day number
Now, in the training set, i need to attach the result of these strategies


In [323]:
# Returns Daily Strategy Scores for Full Data
def daily_strategy_returns(data, strategy_funcs=[
    task1_Strategy1, task1_Strategy2, task1_Strategy3, task1_Strategy4, task1_Strategy5
], num_days=3999):
    # Step 1: Create close price matrix (date x symbol)
    close_pivot = data.pivot(index='Date', columns='Symbol', values='Close')
    close_pivot = close_pivot.sort_index().sort_index(axis=1)

    # Step 2: Compute daily returns from close prices
    returns = close_pivot.shift(-1) / close_pivot - 1
    returns = returns.iloc[:num_days]

    # Step 3: Compute strategy returns per day
    strategy_returns = []

    for strat_func in strategy_funcs:
        weights_df = strat_func()  # should return (4000, 20)
        weights_df = weights_df.iloc[:num_days]
        strat_daily_return = (weights_df * returns).sum(axis=1)
        strategy_returns.append(strat_daily_return)

    # Step 4: Combine strategy returns into DataFrame
    strategy_returns_df = pd.DataFrame(strategy_returns).T
    strategy_returns_df.columns = [f"strategy_{i+1}" for i in range(len(strategy_funcs))]

    return strategy_returns_df

In [329]:
#Preprocesses Full Data
def preprocess(data):
    features = ['Close', 'Volume', 'VWAP', 'High', 'Low', 'Turnover']
    feature_dfs = []

    for feature in features:
        pivot = data.pivot(index='Date', columns='Symbol', values=feature)
        pivot.columns = [f"{feature}_{symbol}" for symbol in pivot.columns]
        feature_dfs.append(pivot)

    full_features_df = reduce(lambda left, right: pd.concat([left, right], axis=1), feature_dfs)
    full_features_df = full_features_df.iloc[:-1]  # drop day 3999

    strategy_returns_df = daily_strategy_returns(data)  # (0 to 3998)
    merged = pd.concat([full_features_df, strategy_returns_df], axis=1)

    strategy_cols = [f"strategy_{i+1}" for i in range(5)]
    merged['label'] = merged[strategy_cols].idxmax(axis=1)

    label_onehot = pd.get_dummies(merged['label'], prefix='label').astype(int)

    merged = merged.drop(columns=strategy_cols + ['label'])
    merged = pd.concat([merged, label_onehot], axis=1)

    return merged.iloc[:-1]

In [330]:
train_data = pd.read_csv('train_data.csv')
crossval_data = pd.read_csv('crossval_data.csv')
full_data = pd.concat([train_data, crossval_data], ignore_index=True)
full_data = preprocess(full_data)


  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weight_row)
  output_df.loc[d, weight_row.keys()] += pd.Series(weig

In [331]:
full_data

Unnamed: 0,Close_0,Close_1,Close_2,Close_3,Close_4,Close_5,Close_6,Close_7,Close_8,Close_9,...,Turnover_15,Turnover_16,Turnover_17,Turnover_18,Turnover_19,label_strategy_1,label_strategy_2,label_strategy_3,label_strategy_4,label_strategy_5
0,1.006308,1.015789,0.994319,1.003638,0.983622,1.018072,0.992221,0.961377,1.000417,1.006221,...,0.341397,0.420741,0.002124,0.001192,0.344860,0,1,0,0,0
1,1.008000,1.013158,0.991334,1.001047,0.986162,1.031928,0.980791,0.963741,0.982495,0.992857,...,0.607836,0.233177,0.006340,0.001229,0.278713,0,1,0,0,0
2,1.012769,0.993860,0.994704,1.027178,0.996541,0.992169,0.980791,0.960326,1.000972,0.990092,...,0.399179,0.188886,0.004154,0.001216,0.344052,0,1,0,0,0
3,1.018308,1.002632,0.994222,1.036659,1.022162,1.001807,0.983966,0.952444,1.015699,1.002995,...,0.312181,0.187236,0.003044,0.001558,0.336474,0,0,0,1,0
4,1.018000,1.016447,0.997882,1.037486,1.024649,1.023494,1.000159,0.954283,1.014865,0.990553,...,0.296927,0.133178,0.001867,0.000872,0.371930,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3994,4.329077,1.551754,6.073471,0.559096,2.854378,241.248193,2.553897,4.943510,5.499861,9.842627,...,8.230619,1.321007,6.435365,1.098627,0.922728,1,0,0,0,0
3995,4.297077,1.566447,6.170149,0.561521,2.861351,242.037952,2.483410,4.962953,5.441095,9.702304,...,4.389508,1.200951,4.619902,0.617235,1.412765,0,0,0,0,1
3996,4.313385,1.552412,6.195185,0.552040,2.884811,242.356627,2.494999,4.973463,5.476799,9.687788,...,6.575141,1.336927,3.169974,0.485855,0.597072,1,0,0,0,0
3997,4.222000,1.567763,6.175542,0.559702,2.889081,241.565663,2.411811,5.094062,5.495415,9.658986,...,4.318706,1.291670,1.495426,0.781587,0.746459,1,0,0,0,0


In [421]:
def fit_model_and_train(n=100, d=5, l=10):
    # Select all feature columns (excluding strategy and label columns)
    X = full_data.drop(columns=['label_strategy_1', 'label_strategy_2', 'label_strategy_3', 'label_strategy_4', 'label_strategy_5'])
    y = full_data[['label_strategy_1', 'label_strategy_2', 'label_strategy_3', 'label_strategy_4', 'label_strategy_5']]
    # Target: convert one-hot encoded label back to class label (0 to 4)
    
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.125, random_state=42, stratify=y)
    y_train = np.argmax(y_train.values, axis=1)
    y_val = np.argmax(y_val.values, axis=1)
    model = RandomForestClassifier(n_estimators=n, max_depth=d, min_samples_leaf=l, random_state=42)
    model.fit(X_train, y_train)         # one-hot-encoding, does not work with RFs, also RFs dont need one-hot encoding whatsoever

    return X_train, X_val, y_train, y_val, model
    
    

In [385]:
X_train, X_val, y_train, y_val, model = fit_model_and_train()

In [386]:
model

In [422]:
def select_best():
    n_list = [1, 10, 80, 100, 120, 150, 180, 200, 500]
    d_list = [3, 4, 5, 6, 7, 8, 9]
    l_list = [100, 80, 60, 40, 20, 10, 8, 6, 4]
    best_score = -1
    best_n = -1
    best_d = -1
    best_l = -1
    for n in n_list:
        for d in d_list:
            for l in l_list:
                print(f"Trying n={n}, d={d}, l={l}", end=' ')
                X_train, X_val, y_train, y_val, model = fit_model_and_train(n, d, l)
                y_pred = model.predict(X_val)
                acc = accuracy_score(y_val, y_pred)
                best_score = max(best_score, acc)
                if best_score == acc:
                    best_n = n
                    best_d = d
                    best_l = l
                print(f" ==> acc={acc}, best_acc={best_score}")
    return best_n, best_d, best_l
                                 
                

In [423]:
n, d, l = select_best()

Trying n=1, d=3, l=100  ==> acc=0.214, best_acc=0.214
Trying n=1, d=3, l=80  ==> acc=0.216, best_acc=0.216
Trying n=1, d=3, l=60  ==> acc=0.216, best_acc=0.216
Trying n=1, d=3, l=40  ==> acc=0.23, best_acc=0.23
Trying n=1, d=3, l=20  ==> acc=0.23, best_acc=0.23
 ==> acc=0.232, best_acc=0.232
Trying n=1, d=3, l=8  ==> acc=0.232, best_acc=0.232
Trying n=1, d=3, l=6  ==> acc=0.234, best_acc=0.234
Trying n=1, d=3, l=4  ==> acc=0.232, best_acc=0.234
Trying n=1, d=4, l=100  ==> acc=0.202, best_acc=0.234
Trying n=1, d=4, l=80  ==> acc=0.204, best_acc=0.234
Trying n=1, d=4, l=60  ==> acc=0.206, best_acc=0.234
 ==> acc=0.236, best_acc=0.236
Trying n=1, d=4, l=20  ==> acc=0.236, best_acc=0.236
Trying n=1, d=4, l=10  ==> acc=0.232, best_acc=0.236
Trying n=1, d=4, l=8  ==> acc=0.236, best_acc=0.236
Trying n=1, d=4, l=6  ==> acc=0.236, best_acc=0.236
Trying n=1, d=4, l=4  ==> acc=0.234, best_acc=0.236
 ==> acc=0.202, best_acc=0.236
Trying n=1, d=5, l=80  ==> acc=0.212, best_acc=0.236
Trying n=1, d=

In [427]:
X_train, X_val, y_train, y_val, model = fit_model_and_train(n, d, l)

In [428]:
accuracy_score(y_val, model.predict(X_val))

0.26

We get a 26% accuracy on cross_val data, which is 6% better than random selection (20%)

In [None]:

def task2():
    output_df_weights = pd.DataFrame()
    
    #Write your code here

    output_df_weights.to_csv('task2_weights.csv')
    results = backtester_without_TC(output_df_weights)
    df_performance = pd.DataFrame({'Net Returns': [results[0]], 'Sharpe Ratio': [results[1]]})
    df_performance.to_csv('task_2.csv')
    return

In [434]:
weights1_df = task1_Strategy1()
weights2_df = task1_Strategy2()
weights3_df = task1_Strategy3()
weights4_df = task1_Strategy4()
weights5_df = task1_Strategy5()

In [469]:
all_dates = range(4000)
all_symbols = list(range(20))  # assuming columns 0 to 19

# Fix weights3_df: currently indexed 7..3999, missing 0..6
weights3_df = weights3_df.reindex(all_dates, fill_value=0)
weights3_df = weights3_df.reindex(columns=all_symbols, fill_value=0)

# Fix weights5_df: currently indexed 1..3999, missing 0
weights5_df = weights5_df.reindex(all_dates, fill_value=0)
weights5_df = weights5_df.reindex(columns=all_symbols, fill_value=0)

# For others (weights1_df, weights2_df, weights4_df), just ensure they have correct index and columns
weights1_df = weights1_df.reindex(all_dates, fill_value=0).reindex(columns=all_symbols, fill_value=0)
weights2_df = weights2_df.reindex(all_dates, fill_value=0).reindex(columns=all_symbols, fill_value=0)
weights4_df = weights4_df.reindex(all_dates, fill_value=0).reindex(columns=all_symbols, fill_value=0)

# Put all your weights DataFrames in a list
weights_dfs = [weights1_df, weights2_df, weights3_df, weights4_df, weights5_df]

# Truncate each DataFrame to have max 3999 rows (to match X length)
for i in range(len(weights_dfs)):
    weights_dfs[i] = weights_dfs[i].iloc[:3999].reset_index(drop=True)

# Now weights_dfs contains the truncated DataFrames
# You can unpack if needed
weights1_df, weights2_df, weights3_df, weights4_df, weights5_df = weights_dfs

In [431]:
X = full_data.drop(columns=['label_strategy_1', 'label_strategy_2', 'label_strategy_3', 'label_strategy_4', 'label_strategy_5'])
X

Unnamed: 0,Close_0,Close_1,Close_2,Close_3,Close_4,Close_5,Close_6,Close_7,Close_8,Close_9,...,Turnover_10,Turnover_11,Turnover_12,Turnover_13,Turnover_14,Turnover_15,Turnover_16,Turnover_17,Turnover_18,Turnover_19
0,1.006308,1.015789,0.994319,1.003638,0.983622,1.018072,0.992221,0.961377,1.000417,1.006221,...,0.018102,0.033365,0.047618,0.014340,0.099950,0.341397,0.420741,0.002124,0.001192,0.344860
1,1.008000,1.013158,0.991334,1.001047,0.986162,1.031928,0.980791,0.963741,0.982495,0.992857,...,0.011445,0.012661,0.145429,0.062741,0.064570,0.607836,0.233177,0.006340,0.001229,0.278713
2,1.012769,0.993860,0.994704,1.027178,0.996541,0.992169,0.980791,0.960326,1.000972,0.990092,...,0.024880,0.005610,0.147956,0.047620,0.022900,0.399179,0.188886,0.004154,0.001216,0.344052
3,1.018308,1.002632,0.994222,1.036659,1.022162,1.001807,0.983966,0.952444,1.015699,1.002995,...,0.024167,0.029789,0.127575,0.044947,0.023923,0.312181,0.187236,0.003044,0.001558,0.336474
4,1.018000,1.016447,0.997882,1.037486,1.024649,1.023494,1.000159,0.954283,1.014865,0.990553,...,0.030979,0.008938,0.065911,0.053923,0.036537,0.296927,0.133178,0.001867,0.000872,0.371930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3994,4.329077,1.551754,6.073471,0.559096,2.854378,241.248193,2.553897,4.943510,5.499861,9.842627,...,2.191538,0.391071,1.198244,1.458253,0.239750,8.230619,1.321007,6.435365,1.098627,0.922728
3995,4.297077,1.566447,6.170149,0.561521,2.861351,242.037952,2.483410,4.962953,5.441095,9.702304,...,1.452606,0.294064,4.600288,1.333114,0.287314,4.389508,1.200951,4.619902,0.617235,1.412765
3996,4.313385,1.552412,6.195185,0.552040,2.884811,242.356627,2.494999,4.973463,5.476799,9.687788,...,2.577236,0.339944,3.168236,1.086851,0.218458,6.575141,1.336927,3.169974,0.485855,0.597072
3997,4.222000,1.567763,6.175542,0.559702,2.889081,241.565663,2.411811,5.094062,5.495415,9.658986,...,3.327485,0.348124,1.006967,0.953084,0.223180,4.318706,1.291670,1.495426,0.781587,0.746459


In [496]:
def custom_Strategy(model=model, X=X, weights_df_list=[weights1_df, weights2_df, weights3_df, weights4_df, weights5_df]):
    # Ensure X is a DataFrame and model predicts over all 4000 days
    X = pd.DataFrame(X).reset_index(drop=True)
    predictions = model.predict(X)  # shape: (4000,), values in {0,1,2,3,4}

    # Stack all 5 strategy weight DataFrames into a 3D NumPy array: (5, 4000, 20)
    weights_array = np.stack([df.values for df in weights_df_list])  # shape: (5, 4000, 20)

    # Select for each day (0 to 3999) the correct (strategy, day) row
    day_indices = np.arange(3999)
    chosen_weights = weights_array[predictions, day_indices]  # shape: (4000, 20)

    # Convert to DataFrame with same structure as original
    custom_weights_df = pd.DataFrame(chosen_weights, columns=range(20), index=range(len(chosen_weights)))
    last_pred = model.predict(X.iloc[3998:3999])[0]
    last_weights = weights_df_list[last_pred].iloc[3998]

    # Convert last_weights Series to DataFrame row
    last_weights_df = last_weights.to_frame().T

    # Append to custom_weights_df
    custom_weights_df = pd.concat([custom_weights_df, last_weights_df], ignore_index=True)
    return custom_weights_df

In [497]:
backtester_without_TC(custom_Strategy())

[np.float64(4.763053825114705), np.float64(0.02133465278514637)]

In [490]:
backtester_without_TC(task1_Strategy1())

[np.float64(2.573023976625355), np.float64(0.018911677158724938)]

In [491]:
backtester_without_TC(task1_Strategy2())

[np.float64(-5.67201973052488), np.float64(-0.028456722846754)]

In [492]:
backtester_without_TC(task1_Strategy3())

[np.float64(-0.896308765751519), np.float64(-4.097029994888793e-05)]

In [493]:
backtester_without_TC(task1_Strategy4())

[np.float64(12.159788845936825), np.float64(0.04410846436813261)]

In [494]:
backtester_without_TC(task1_Strategy5())

[np.float64(7.375042111050867), np.float64(0.021283711517966163)]