In [6]:
import pandas as pd
import numpy as np
import sklearn
import torch.nn as nn
import sklearn.model_selection
from sklearn import linear_model
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from datetime import datetime
from sklearn.decomposition import PCA
pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
pd.reset_option('display.max_rows')

In [2]:
def add_target(team):
    team['target'] = team['won'].shift(-1)
    return team

def rest_days(team):
    team['date_time'] = pd.to_datetime(team['date'])
    team['rest'] = (team['date_time'] - team['date_time'].shift(1)).dt.days
    team['rest'] = team['rest'].fillna(0)
    team['rest'] = team['rest'].astype(int)
    del team['date_time']
    return team

def winrate(team):
    total = team['Wins'] + team['Losses']
    total_opp = team['Wins_opp'] + team['Losses_opp']
    team['winrate'] = team['Wins'] / total
    team['winrate_opp'] = team['Wins_opp'] / total_opp
    return team

def differential(team):
    team['differential'] = team['Total'] - team['Total_opp']
    return team

def find_team_exp_average_5(team):
    numeric_columns = team.select_dtypes(include=np.number)
    rolling = numeric_columns.ewm(span=5, adjust=False).mean()
    return rolling

def find_team_exp_average_9(team):
    numeric_columns = team.select_dtypes(include=np.number)
    rolling = numeric_columns.ewm(span=9, adjust=False).mean()
    return rolling

def find_team_exp_average_12(team):
    numeric_columns = team.select_dtypes(include=np.number)
    rolling = numeric_columns.ewm(span=12, adjust=False).mean()
    return rolling

def find_team_average_15(team):
    numeric_columns = team.select_dtypes(include=np.number)
    rolling = numeric_columns.rolling(15).mean()
    return rolling

def find_team_average_10(team):
    numeric_columns = team.select_dtypes(include=np.number)
    rolling = numeric_columns.rolling(10).mean()
    return rolling

def find_team_average_5(team):
    numeric_columns = team.select_dtypes(include=np.number)
    rolling = numeric_columns.rolling(5).mean()
    return rolling

def find_team_average_3(team):
    numeric_columns = team.select_dtypes(include=np.number)
    rolling = numeric_columns.rolling(3).mean()
    return rolling

def rolling(data):
    df_rolling_3 = data[list(valid_columns) + ['Teams','won', "season"]]
    df_rolling_3 = df_rolling_3.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_3)
    df_rolling_5 = data[list(valid_columns) + ['Teams','won', "season"]]
    df_rolling_5 = df_rolling_5.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_5)
    df_rolling_10 = data[list(valid_columns) + ['Teams','won', "season"]]
    df_rolling_10 = df_rolling_10.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_10)
    df_rolling_15 = data[list(valid_columns) + ['Teams','won', "season"]]
    df_rolling_15 = df_rolling_15.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_15)
    df_exp_rolling_5 = data[list(valid_columns) + ['Teams','won', "season"]]
    df_exp_rolling_5 = df_exp_rolling_5.groupby(['Teams', 'season'], group_keys = False).apply(find_team_exp_average_5)
    df_exp_rolling_9 = data[list(valid_columns) + ['Teams','won', "season"]]
    df_exp_rolling_9 = df_exp_rolling_9.groupby(['Teams', 'season'], group_keys = False).apply(find_team_exp_average_9)
    df_exp_rolling_12 = data[list(valid_columns) + ['Teams','won', "season"]]
    df_exp_rolling_12 = df_exp_rolling_12.groupby(['Teams', 'season'], group_keys = False).apply(find_team_exp_average_12)
    exp_rolling_columns_5 = [f"{col}_exp_5" for col in df_exp_rolling_5.columns]
    exp_rolling_columns_9 = [f"{col}_exp_9" for col in df_exp_rolling_9.columns]
    exp_rolling_columns_12 = [f"{col}_exp_12" for col in df_exp_rolling_12.columns]
    rolling_columns_15 = [f"{col}_15" for col in df_rolling_15.columns]
    rolling_columns_10 = [f"{col}_10" for col in df_rolling_10.columns]
    rolling_columns_5 = [f"{col}_5" for col in df_rolling_5.columns]
    rolling_columns_3 = [f"{col}_3" for col in df_rolling_3.columns]
    df_exp_rolling_12.columns = exp_rolling_columns_12
    df_exp_rolling_9.columns = exp_rolling_columns_9
    df_exp_rolling_5.columns = exp_rolling_columns_5
    df_rolling_15.columns = rolling_columns_15
    df_rolling_10.columns = rolling_columns_10
    df_rolling_5.columns = rolling_columns_5
    df_rolling_3.columns = rolling_columns_3
    df = pd.concat([data, df_rolling_3, df_rolling_5, df_rolling_10, df_rolling_15, df_exp_rolling_5,df_exp_rolling_9, df_exp_rolling_12], axis=1)
    # df_exp_rolling_5,df_exp_rolling_9, df_exp_rolling_12
    return df

def ratio(feature):
    feature_opp = 'OPP_' + str(feature)
    free = nba[feature] / nba[feature_opp]
    return free

def ratios(nba):
    regard = []
    disregard = [col for col in nba.columns if "OPP_" in col]
    for col in disregard:
        col = col[4:100]
        if col in nba.columns:
            regard.append(col)
    nba_ratio = nba[regard].apply(ratio)
    nba_ratios_columns = [f"{col}_ratio" for col in nba_ratio.columns]
    nba_ratio.columns
    return regard

def shift_col(team, col_name):
    next_col = team[col_name].shift(-1)
    return next_col

def add_col(df, col_name):
    return df.groupby("Teams", group_keys=False).apply(lambda x: shift_col(x, col_name))

def date_change(datetime_str):
    # Parse the datetime string into a datetime object
    datetime_obj = datetime.strptime(datetime_str, '%m/%d/%Y')

    # Format the datetime object into a new string structure
    new_datetime_str = datetime_obj.strftime('%Y-%m-%d')

    return new_datetime_str

def haircut(df, date):
    df[date] = df[date].str[:10]
    return df

def convert_date_format(df):
    # Create a boolean mask to identify values in the "m/d/y" format
    mask = df['Date'].str.contains(r'\d{1,2}/\d{1,2}/\d{2}')
    
    # Apply the conversion only to values that match the mask
    df.loc[mask, 'Date'] = nba.loc[mask, 'Date'].apply(date_change)
    return df

def spread(df):
    # construct spread between home and away rankings
    ranks = [i for i in df.columns if 'RANK' in i]
    ranks_home = [i for i in ranks if '_x' in i]
    ranks_away = [i for i in ranks if '_y' in i]
    spread_columns_names = [col[:-1] for col in ranks_home]
    
    spread_columns = []
    for base_name in spread_columns_names:
        spread_column = df[f'{base_name}x'] - df[f'{base_name}y']
        spread_column.name = f'{base_name}spread'
        spread_columns.append(spread_column)
        
    spread_df = pd.concat(spread_columns, axis=1)
    return spread_df

In [20]:
!pwd

/Users/benjamincheng/Documents/GitHub/Sports-Betting/ml_notebooks/models


In [54]:
folder_path = "/Users/benjamincheng/Documents/GitHub/Sports-Betting/data/raw_data/NBA_2018_2024.csv"
#folder_path = "/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/data/raw_data/NBA_2018_2024.csv"

df = pd.read_csv(folder_path, index_col=0)

folder_path = "/Users/benjamincheng/Documents/GitHub/Sports-Betting/nba_api/data/teams_stats/processed_cumulative_season_stats_2019_2024.csv"
#folder_path = "/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/nba_api/data/teams_stats/processed_cumulative_season_stats_2019_2024.csv"
nba = pd.read_csv(folder_path, index_col=0)

folder_path = "/Users/benjamincheng/Documents/GitHub/Sports-Betting/data/odds_data/2021_01_20_onward.csv"
#folder_path = "/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/data/odds_data/2021_01_20_onward.csv"
odds = pd.read_csv(folder_path, index_col=0)

In [55]:
# nba dataframe does not include the 2018 season
df = df[~df['season'].isin([2018])]
df = df.reset_index(drop=True)
df = haircut(df, 'date')

# rename nba columns to match df
nba = haircut(nba, 'Date')
nba = convert_date_format(nba)
nba.rename(columns={'Date': 'date'}, inplace=True)

#rename odds columns to match df
odds.rename(columns={'Timestamp': 'date_next'}, inplace=True)

# construct winrate for team
df = winrate(df)
# construct differential points
df = differential(df)
# construct target
df = df.groupby("Teams", group_keys=False).apply(add_target)
# construct resting
df = df.groupby(["Teams",'season'], group_keys=False).apply(rest_days)
# games yet to play are 2
df.loc[pd.isnull(df['target']), 'target'] = 2
# convert win/loss to 1/0
df['target'] = df['target'].astype(int)

# remove metadata and target for df
removed = ['target', 'date', 'Teams_opp', 'Teams',
           'season','won', 'Wins', 'Losses', 
           'Wins_opp', 'Losses_opp', 'winrate', 'winrate_opp']
valid_columns = df.columns[~df.columns.isin(removed)]

df = df.dropna()

# scale the data for df
scaler = StandardScaler()
df[valid_columns] = scaler.fit_transform(df[valid_columns])

df = df.dropna()

# PCA


# construct rolling features to df
df = rolling(df).copy()
df = df.dropna()

# remove metadata for nba ranking 
removed = ['date', 'Teams']
valid_columns = nba.columns[~nba.columns.isin(removed)]

# scale the ranking data
scaler = StandardScaler()
nba[valid_columns] = scaler.fit_transform(nba[valid_columns])

# remove metadata for odds data
removed = list(odds.columns[odds.dtypes == 'object'])
valid_columns = odds.columns[~odds.columns.isin(removed)]

# scale the odds data
scaler = StandardScaler()
odds[valid_columns] = scaler.fit_transform(odds[valid_columns])

# construct current game metadata for df
df['home_next'] = add_col(df, 'home')
df['team_next_opp'] = add_col(df, 'Teams_opp')
df['date_next'] = add_col(df, 'date')
df = df.copy()

# merge stats from nba dataframe
full = pd.merge(df, nba, on=['Teams', 'date'], how='left')
full = full.dropna()

# merge stats from opposing teams
complete = full.merge(full,
               left_on=['Teams', 'date_next'],
               right_on = ['team_next_opp', 'date_next'])

# concat the spreads
spread_df = spread(complete)
complete = pd.concat([complete, spread_df], axis=1)

# concat the odds
complete = pd.merge(complete, odds, on=['Teams_x', 'date_next', 'Teams_y'], how='left')
complete = complete.dropna()

pca = PCA(n_components=0.95)
disregard = list(complete.columns[complete.dtypes == 'object']) 
# disregard = disregard + ["target_x","target_y"]
# Temporary change for tuning
disregard = disregard + ["target_y" + "target_x"]
regard = complete.columns[~complete.columns.isin(disregard)]
new_data = pca.fit_transform(complete[regard])

  df = df.groupby("Teams", group_keys=False).apply(add_target)
  df = df.groupby(["Teams",'season'], group_keys=False).apply(rest_days)
  df_rolling_3 = df_rolling_3.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_3)
  df_rolling_5 = df_rolling_5.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_5)
  df_rolling_10 = df_rolling_10.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_10)
  df_rolling_15 = df_rolling_15.groupby(['Teams', 'season'], group_keys = False).apply(find_team_average_15)
  df_exp_rolling_5 = df_exp_rolling_5.groupby(['Teams', 'season'], group_keys = False).apply(find_team_exp_average_5)
  df_exp_rolling_9 = df_exp_rolling_9.groupby(['Teams', 'season'], group_keys = False).apply(find_team_exp_average_9)
  df_exp_rolling_12 = df_exp_rolling_12.groupby(['Teams', 'season'], group_keys = False).apply(find_team_exp_average_12)
  return df.groupby("Teams", group_keys=False).apply(lambda x: shift_col(x, c

In [57]:
new_data.shape

(7284, 210)

In [69]:
# construct new dataset
new_data_df = pd.DataFrame(new_data)
complete_cleaning = pd.concat([new_data_df, complete['target_x']], axis=1)
complete_cleaning = complete_cleaning.dropna()

In [70]:
complete_cleaning

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,target_x
3480,38.932193,10.359131,3.423958,-10.867933,-10.191916,-4.527075,-5.092002,-8.047936,3.301930,8.520768,7.555385,-10.915409,-5.797639,6.786231,-2.310567,6.637961,0.888507,-6.441761,0.761253,-6.854159,-0.034790,6.724362,-5.482702,-3.885671,2.652593,-2.596124,-1.201480,2.018209,-3.577083,1.624797,6.938743,2.213901,-3.538007,0.489163,1.843563,3.610187,1.473977,1.626125,4.230126,-1.561270,0.718977,1.360121,3.752333,-3.850286,-0.658804,2.398301,-0.939845,0.849419,-0.314745,-0.849981,2.085697,-1.160466,0.292448,-1.085845,1.044355,-1.773182,1.712232,2.224505,0.882525,-1.251148,-0.954058,3.467242,-0.004625,-0.598897,0.053073,1.282731,-1.214162,0.694404,0.168475,1.516021,3.238554,-0.018992,-2.234965,-0.131055,1.407750,1.935900,-1.740643,0.895655,-0.440040,-2.138976,2.694747,1.220210,-0.051038,-0.625442,-1.415179,-0.161420,1.524471,1.220290,-1.577260,0.835839,0.565466,-2.181395,-1.615432,3.694167,0.959382,0.582909,-1.412699,-0.414199,1.713605,1.509660,-0.086495,-3.156318,-0.357320,-0.706386,2.061964,2.006242,2.150879,-0.681931,0.427717,1.171600,-3.072371,-2.977199,0.384891,-2.984971,1.270971,0.837872,-0.306985,0.409950,2.037288,-2.884417,0.342019,0.354760,-3.047873,0.288600,-3.413516,1.190697,1.033936,0.084777,0.782887,-0.460147,0.675773,3.605109,0.647844,1.236363,0.991778,-1.169503,0.835076,0.614633,-0.873964,0.700576,-2.206979,-0.108523,-1.504960,0.485185,0.255956,-0.477419,-0.854388,-0.727381,-0.241108,1.295797,-1.868560,-0.790290,-0.339685,-1.073472,-1.257389,-0.379514,1.054372,-1.353437,-0.288015,3.285049,0.132711,0.933371,2.857965,0.968239,1.379459,-0.861229,1.123858,-1.291637,-0.262946,-0.127624,1.962697,-0.922673,-0.483780,-0.710672,-0.822571,0.684370,-1.375738,0.574163,-0.155613,0.097246,0.086624,-1.164909,1.010678,-0.721900,1.459099,0.118124,-0.598240,-0.088634,-1.054098,0.331285,0.603434,-0.286857,-0.188738,0.188545,0.155689,0.952000,0.303764,0.290820,0.628234,-1.048564,0.900541,-1.649587,-0.664096,0.467994,0.321143,-0.498290,-0.293177,-0.630496,0.308141,-0.307566,1.0
3482,40.457300,16.374521,-6.923654,11.005788,-2.727756,-13.015506,-3.308281,1.097636,-2.729533,2.117121,4.624138,5.742022,-3.211445,-1.893318,-1.918782,8.846346,-5.749513,0.825835,-4.343874,0.324044,0.038228,4.008920,2.152770,3.428145,-0.594474,5.988318,-6.795020,0.544827,3.000067,-3.165540,-1.976002,0.102166,-0.646755,1.182642,-3.801903,2.239588,-1.798328,-2.053466,-2.600844,4.585037,0.346801,2.594669,-1.597531,-0.505415,-2.218588,0.164947,1.931386,5.542200,1.883463,-0.854850,-0.165563,0.904898,4.071357,0.549065,0.717642,0.564795,0.739862,-2.136789,0.894084,-0.448777,-2.262250,-0.023272,-2.020136,2.561427,-1.314704,0.309107,0.480301,2.281675,1.796859,3.095455,0.459063,0.070807,-0.882637,-3.331787,1.065631,-0.930052,1.483801,-2.089211,2.350446,2.173723,-1.340735,0.960964,-1.328044,-0.163234,2.116012,-1.439411,-0.040280,-0.633866,0.157461,-1.748924,1.895237,-1.451069,0.357742,0.643184,0.080198,0.455668,-0.819971,0.265922,1.426194,-0.278167,-0.508526,0.187272,-0.598379,2.608008,0.186063,1.591676,1.333410,-0.132662,0.448536,1.816544,-0.990111,1.230279,2.157779,-0.336482,0.521620,-0.400394,0.090261,0.979838,1.402480,0.187318,-0.361730,0.095007,-1.236180,-0.115535,-1.337541,1.196263,-0.349386,0.938315,-0.185216,-1.436374,-2.094814,0.460247,-0.562620,-0.278090,0.197212,1.314355,0.423097,-0.709917,-0.847029,-0.186434,0.958454,1.397081,0.320509,2.052772,0.334097,0.146705,0.254707,0.269222,1.459363,0.482820,0.660032,-0.436694,-0.381205,2.697027,-1.379243,-0.171462,-0.718073,1.270651,-0.273876,0.629254,-1.079043,0.144442,0.321384,0.654267,1.315471,-2.053146,-0.546618,-0.569385,-1.015890,0.537675,-0.440031,-0.000067,-1.156564,0.082193,-1.320867,-0.090622,0.510958,0.322903,1.316113,-1.602178,0.741134,0.046932,-0.295667,-0.410201,-0.303413,0.633157,1.503265,0.170315,-1.017739,-0.218119,-1.370087,-0.413377,-0.082186,-0.919428,-0.326747,-0.407725,2.243818,1.415598,1.327011,-1.481674,-1.540677,-0.545420,-0.643078,0.965527,-1.272349,1.021707,-0.877439,-1.205391,1.349377,-0.236620,1.0
3493,40.404207,-14.758809,-13.802500,27.562467,3.798879,-3.367982,-4.171276,-0.222501,3.992395,2.972432,14.611782,-4.656365,-6.197776,-3.999794,-2.018750,2.090093,2.681667,1.589365,-1.462367,-2.584092,2.072173,-2.572375,-3.207872,-3.507792,-1.530802,2.824272,3.508022,4.497621,3.679387,-2.904250,1.713368,6.021719,-0.685122,0.047811,-0.359756,-1.970509,1.465312,-3.176085,-1.854433,-0.067855,-4.791427,0.768976,-2.432898,2.286739,1.748316,2.749649,0.811635,3.874573,-4.327259,-0.013452,1.877633,1.594643,-4.981515,4.670469,1.404245,0.728125,5.912013,0.738125,-2.951750,-0.573458,-1.319431,-2.670703,-0.526038,-3.462554,0.283855,0.559861,-1.313617,-1.708956,1.262582,0.771642,0.379312,1.559125,0.238623,-2.105893,3.064708,-0.022299,-3.265719,-1.170604,-1.709944,-0.615390,2.115515,-0.061727,-0.371647,2.601112,0.521853,-3.328406,0.739958,-0.116653,1.416847,-1.421475,0.285669,-1.112267,3.517096,0.833432,1.091602,-0.069442,-0.126300,-0.756818,0.652432,1.883108,0.968613,0.055713,-0.732976,0.421808,0.617511,-2.204279,-0.663638,-0.586779,-1.389217,0.352111,0.545269,-1.024112,-0.295157,-0.217136,2.134607,-0.610582,-2.044054,1.016057,-1.032736,-0.074843,-0.497630,0.345653,-1.214439,-2.677523,-0.508344,-0.995410,-1.686125,0.874500,1.246827,0.749959,-1.768129,0.938415,0.321952,1.501696,0.448785,1.138930,-0.952135,-3.357671,0.493695,-1.673822,-0.312850,-0.237317,-1.585103,0.736415,0.178484,0.953863,-1.886932,-0.780752,0.203254,-1.157515,-0.767039,0.475359,0.016697,1.633996,0.514660,-0.084596,2.043934,0.511783,-1.152539,0.759008,-1.600993,1.459693,0.149164,1.540816,1.056256,0.455541,-0.469882,-0.189131,-0.939022,-0.540127,0.553709,0.863260,-0.810712,-1.595876,0.885985,-0.993074,1.227258,0.414189,1.401897,-0.798898,-1.067472,0.197669,-0.153746,-1.022313,-0.147630,1.064868,1.125863,-1.890881,-0.455063,-2.407566,0.674175,0.122979,0.770993,-0.229501,1.081804,1.113083,-0.402262,1.969128,-0.722993,1.533847,-0.958718,-1.361484,-0.691430,0.337311,-2.037004,0.869319,0.116110,0.050200,-0.700466,0.602666,1.0
3500,41.260764,-2.925841,12.076066,-16.026180,0.054102,9.725052,1.704710,-7.356294,6.657713,6.305663,3.435843,-4.500935,-0.885058,4.271453,0.890816,-1.356611,-5.045189,0.053436,2.584763,-2.277619,-3.361627,1.770443,0.638831,-0.063311,3.401723,-4.851996,-1.332666,0.519446,-8.963664,-4.086986,2.066501,-3.652419,-2.474937,2.803714,0.140966,2.594909,1.157765,2.927687,-0.823808,-2.638609,2.027755,-0.430969,1.067823,-0.219186,0.192291,-2.141740,-3.911383,-1.210182,1.818403,-0.583259,0.952704,-1.082890,0.163279,2.860664,0.996858,-2.362175,-3.249978,2.551429,-0.429343,3.215650,-1.117054,-1.719595,5.573781,2.463663,-2.662444,4.535951,0.082116,-0.362356,1.565619,3.444553,-0.640205,-1.517476,1.745659,-0.688312,-0.165124,0.096966,-1.331724,0.839476,-0.156329,-0.078500,-1.371751,-0.031589,-0.715130,1.482398,0.207945,0.600324,0.577260,-0.871481,-1.579004,-1.194655,-0.346943,-0.842683,2.933199,3.207807,0.260061,0.875937,1.602059,0.123275,-1.775874,-0.090832,-2.937971,-1.640324,-1.874131,-1.534511,0.571614,2.219799,-0.830290,1.297523,0.060626,-2.348220,-0.049882,-0.180048,-0.504262,-1.111348,0.353563,0.858885,-0.108791,-0.035217,1.808818,0.621835,-1.369242,0.538767,-3.092423,-1.715457,0.405806,0.606756,-1.269536,0.759619,-0.017938,-2.948113,-1.616837,0.017460,-1.451177,0.538561,-0.808827,-2.317763,1.048251,0.345371,-0.576811,-0.341154,0.024170,1.071957,2.018031,-1.501186,-0.992690,0.404303,0.515321,1.012199,-0.773349,0.391787,-1.722553,-0.774351,-0.113060,-0.410035,-0.616713,0.300771,0.089584,0.341970,0.653185,1.594445,-0.877818,0.218870,-0.733019,0.073543,0.975232,-0.866538,1.103390,0.967873,0.494967,-0.997852,-0.628750,-0.377351,-0.470481,-1.898127,-1.433342,1.224948,0.799029,-0.321378,-0.432419,0.231463,0.330946,0.236503,1.354866,-2.200656,-0.420458,0.845359,0.491203,0.060138,-1.049597,0.793217,0.059705,1.232958,0.751395,0.377415,0.575710,1.237473,-1.437525,-0.821491,1.193765,-1.144097,1.486418,0.817172,-0.444790,-0.083463,-0.283436,-0.386107,0.646240,-0.461836,-0.609921,0.329692,0.0
3502,40.120973,-9.254572,35.027442,-13.973184,3.515168,10.541112,-4.050987,0.064366,3.720342,-5.139672,-3.344226,-3.674838,-1.208809,-0.653882,-1.071002,3.119333,-8.941394,1.952425,-4.823955,6.848660,1.738471,-2.375197,1.180851,-0.891506,-4.752805,1.738845,5.326738,5.224639,1.751671,0.499402,-0.708558,0.622995,-2.120093,5.382117,5.060779,-0.223397,2.072245,-0.076892,-4.870749,4.228053,-0.125243,-0.239997,-0.154528,0.610384,2.544634,-0.802889,-2.655838,-5.383305,0.801875,2.265054,4.812852,0.857705,0.325203,2.740862,2.627122,-0.510472,1.260074,-0.082858,-2.139690,-0.944655,-3.598631,1.639537,0.377234,-1.726347,1.399118,3.968713,0.278984,-0.433339,-2.546096,-1.003240,0.910340,1.277280,0.615167,1.291587,-1.750165,0.353585,1.763995,2.017471,-0.842781,-0.017272,-0.298828,-1.957862,0.032129,-0.933554,0.663664,-1.094850,2.028595,-1.293706,0.475145,3.285911,0.556520,0.634378,-1.096336,3.392850,3.292291,0.890271,0.388458,1.265361,-2.988770,-2.418739,0.107701,-3.082352,-0.590644,1.308787,1.095343,1.279193,0.620205,1.930764,2.855288,1.257851,-1.381461,-0.719581,1.053901,0.606900,-0.469923,-0.863429,0.291747,-0.565680,-2.156682,0.993521,-0.076435,0.679186,0.370580,0.874887,-1.040987,0.065378,0.180531,1.167297,-2.692153,1.013392,-0.815993,1.936218,-0.168556,-0.451803,0.553221,0.163636,1.289778,-0.577442,-0.314160,-0.090404,0.909906,1.497754,-0.919273,-0.004817,-0.104997,-1.456288,-0.568082,2.406149,0.623801,-1.628340,-0.826450,1.521281,0.160288,1.157352,0.295528,-0.505262,1.334588,-1.328560,0.178462,-0.927063,-2.041811,1.000448,-1.161075,0.568066,0.849456,0.247455,0.033829,-0.129229,0.318325,0.449482,-1.502920,2.013632,1.028301,-1.434688,-1.790070,-0.048331,0.894686,0.426835,-0.220867,-0.054678,-0.539152,-0.285391,-0.425418,-0.170041,1.426593,1.101583,-1.272677,0.896448,-0.395933,0.122974,-0.721116,0.751967,0.415178,0.332408,0.460704,0.271283,-0.026385,0.803769,-1.106643,-0.245950,0.985668,-0.203964,-1.107521,0.517207,-1.067216,0.065816,-0.752821,0.253304,1.966425,-0.483101,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7279,33.337567,-9.299371,-10.678118,0.287284,-8.996308,-7.345720,-8.701260,5.646733,-2.350658,0.339356,1.174319,-2.425747,-1.763933,-1.928176,-0.102209,3.374626,2.825784,-7.905369,5.858033,2.343114,9.652820,-3.808550,2.515069,0.337286,-5.492054,5.157535,-2.371658,2.428825,-2.533199,3.034003,1.947584,0.827700,4.688520,-1.826077,-4.431206,-0.082335,1.056306,1.262174,-1.412315,-1.521393,-2.060831,2.345691,-1.429172,-2.231170,1.489338,-1.534328,1.460460,1.838129,-1.163434,-0.817544,-3.301896,0.611882,3.216110,1.350643,3.360107,-1.104104,3.754913,-2.306547,-1.753994,-4.572826,-1.424934,-0.051080,-0.841065,-3.242638,2.578750,0.213423,-2.009117,1.810267,0.461856,2.479697,-2.388083,1.313913,-0.063128,-1.829562,1.003442,1.050932,-1.783858,4.376442,1.466395,-1.781396,0.983476,0.115915,-1.829350,-0.078048,1.227366,-2.338599,-0.333119,2.251835,0.498479,-0.317825,-1.792239,-0.129876,-0.485981,-0.989651,0.392663,-0.431638,0.870840,-1.870300,-1.046015,0.555752,0.443185,2.493403,1.379780,-0.708151,-1.574451,0.698796,0.418750,-1.685327,1.318377,-0.840576,-1.042735,0.798430,0.735027,-1.206833,-0.036360,-0.925230,0.847045,-0.118253,-2.281280,-0.217958,0.073766,0.541142,0.744704,-0.273900,1.606767,-0.510925,1.248415,-0.740977,0.761748,-0.704215,0.602276,-0.194797,0.712102,-0.711939,0.593408,-0.838401,-1.220738,-0.391679,0.683837,1.152394,0.580872,-0.655718,1.190646,-1.599504,1.465050,-1.090208,1.988955,1.000754,-0.217182,-2.464317,-1.336747,-0.130525,0.033945,-2.086458,-0.418932,1.044784,-1.059127,0.667204,0.395781,0.204880,-0.736942,-0.760769,0.151624,-0.361075,0.884666,0.636773,-0.697361,-1.549321,-1.196698,1.047279,0.012114,-0.839508,-2.242592,0.599846,-0.894562,0.866486,-0.986189,0.709259,-0.477238,-0.263996,-1.946364,0.396516,1.115457,-0.048110,0.064254,0.157095,0.728769,-1.664474,1.302916,-0.353429,0.130807,-1.661753,-0.631386,0.932713,1.429910,-0.542718,-1.787823,-0.619578,-0.510736,-2.794200,0.730290,-0.255906,0.097535,-0.983817,0.817467,0.118488,0.045209,-1.378552,1.090081,0.198901,0.0
7280,33.629629,-28.355213,-7.436787,-21.975698,-3.824086,3.488843,7.337865,6.704603,10.267905,-14.791198,0.832246,9.300105,4.259584,1.139178,1.588268,-2.586778,0.803660,-2.674889,3.195146,-2.008129,8.348551,2.663760,-2.100775,-2.466225,0.206602,-0.319199,-2.698356,3.383621,6.716098,3.557096,0.605405,0.237022,-0.846705,-0.382601,0.809616,-2.016213,-3.969440,1.852692,0.599466,5.154510,0.416187,1.833945,-3.304212,-0.420929,3.440898,2.000500,-3.740708,3.325869,-1.430073,2.673492,-0.143684,1.198122,4.494341,-0.059360,-1.060635,0.428488,-4.677905,0.187021,-1.363106,-0.146112,-0.121224,-1.537038,0.773008,2.252931,-2.698354,-1.049615,-0.967630,-0.208433,-1.461538,4.167355,0.692916,-0.682153,-2.663375,-0.312510,1.635708,-0.511973,-0.126287,-0.619548,3.143597,1.096167,-1.618260,2.630813,-0.329614,0.824894,1.659757,2.175442,0.997431,2.041647,1.061606,-0.190580,-1.519417,3.736538,0.864177,0.280323,0.642171,4.711866,0.744303,2.209105,0.093185,0.299300,-1.843021,0.227832,-0.821296,0.906782,-1.024814,0.922621,-0.853809,-0.853305,-0.881694,-0.101858,1.611711,-0.998360,-2.278300,-1.125143,-0.121859,0.882966,3.563052,-0.693004,-0.763688,1.295795,-1.149054,-0.074823,-0.520627,-0.874140,0.004929,-2.599044,-0.189537,-0.303713,2.048242,-0.323647,-0.166168,-1.093947,-0.394587,0.071779,-0.339176,-0.704968,-3.177022,1.324880,-1.557030,0.500785,-0.619590,-0.098558,1.172940,1.469079,-0.239284,-2.519188,-0.675684,0.177829,-0.434390,1.300871,0.136791,-0.855695,1.040369,-0.207943,0.917002,-2.679822,-0.373044,0.405347,1.881709,1.625707,1.604770,-1.888203,2.101555,0.048303,-0.946483,2.064155,0.646441,-0.411748,-0.715936,-1.022905,-0.092299,-0.430891,0.642016,-0.482650,-0.858053,1.851101,-0.175035,-0.318069,0.033163,-0.853632,1.262799,0.854606,-0.901064,-0.439460,-0.283713,-0.229371,1.178139,0.334032,0.385521,0.212840,0.499525,-2.037040,1.110018,0.209283,1.656824,-0.110394,-1.031214,-0.245142,-0.742845,0.629298,-1.322419,1.799899,-0.093244,-0.345170,0.312452,-1.758944,0.934884,0.329721,-2.354008,-0.737357,1.0
7281,33.006870,-30.596298,7.834759,1.270376,4.414120,4.167087,2.236802,6.021288,-3.079995,-2.080917,-2.726095,1.133947,5.234443,1.792495,0.093955,5.063045,-3.754642,-0.265465,0.507299,9.033278,8.979930,-4.238901,-4.285118,0.109058,-5.198273,-0.316749,-2.700423,-1.799313,-3.829276,-1.331920,-5.449568,-5.698134,-4.956245,-0.620167,5.972354,-5.247526,2.689331,-3.848000,2.506219,0.235950,2.265464,2.831597,-4.601698,-3.339984,-0.675863,0.926125,-2.033725,1.535226,1.887819,0.797195,1.296532,-1.547803,-3.788061,0.641687,-1.957617,0.763702,0.722153,-2.282736,1.618892,-0.083651,-1.070043,0.951521,1.805358,-1.113801,-0.040116,-3.257614,3.524891,-1.595022,2.281520,0.470960,0.167482,2.828350,1.356656,0.604846,-2.656761,-3.066394,2.591636,-2.518308,-2.443336,0.278559,-1.455694,1.615219,-0.115513,-0.777644,-1.595854,-0.180985,-3.254145,-3.171485,2.534710,-0.270309,-1.848724,1.513675,-2.025768,2.077866,-1.463627,-0.799741,0.170306,-0.311063,-0.879934,0.014825,1.699277,-1.217200,-0.070009,2.867791,0.026646,0.360078,-0.806277,1.499284,1.801716,-0.646599,-0.376501,-0.090183,0.303490,1.365482,1.104832,-2.101145,-0.746928,1.844290,0.935827,0.218035,-2.139081,-0.560429,1.048663,0.420268,0.657508,-1.042748,3.113244,0.136406,-0.962309,0.186353,-1.765334,-0.182598,0.790512,-1.430749,-0.038824,0.507229,-0.372910,-0.564515,-2.179264,0.240900,-1.538858,0.704314,-0.586626,-1.298796,0.994718,0.991785,0.288657,-0.271775,1.452798,0.766146,-0.045943,-0.050295,-1.644818,-2.113900,0.469678,-0.230045,-0.066804,0.414733,1.199837,1.480415,-1.655858,-1.478636,2.634869,-1.305616,-0.869323,-0.609668,-1.407490,1.767356,0.274580,0.521454,-0.156489,-0.057049,-0.989660,0.774665,0.005754,-0.722399,0.666789,1.489426,-2.580956,1.059161,-0.045709,-0.078788,0.914994,-1.613633,-2.181243,-0.961995,-0.543880,2.470632,-1.799101,0.793488,-0.111045,-0.545201,0.221525,0.000687,0.450112,0.886340,0.723425,0.215694,0.896809,-0.433647,0.113866,1.010059,-1.436490,-0.339192,-1.069970,-0.713697,0.628667,1.585582,0.200306,0.854907,0.0
7282,33.085938,23.406804,6.285478,15.908907,6.907672,-10.366224,1.349897,2.914477,4.944940,0.277638,0.141774,7.167143,3.928364,-1.519214,0.528020,-2.043388,-2.940852,-9.494871,3.003119,-0.967418,2.439671,-2.403559,-1.189010,0.059683,-4.208046,7.631861,4.664129,-5.456275,-2.341704,0.439124,3.042760,-2.067783,-2.733051,-3.448684,-1.861056,3.046230,1.277618,-1.841023,2.824255,0.160177,1.709139,2.872059,-1.821410,0.771230,-0.098684,-0.496048,3.832128,1.613368,3.645863,2.472417,-1.949691,0.629790,-2.536067,-4.029239,-3.533406,1.558652,3.856682,3.789408,-1.920113,-0.630396,-0.183020,-0.440159,0.922181,3.286462,0.503582,-0.805708,-2.226665,-1.668991,-6.361873,-2.514252,-1.965710,1.013010,-3.896044,-0.560268,0.310485,-0.255347,-0.480431,0.306599,1.154726,1.356453,-1.099446,2.768168,1.997368,-2.006978,-1.562477,1.165395,-0.926708,3.241682,-3.772984,4.359964,0.468246,2.718046,-0.478308,-0.480794,1.932810,0.182267,1.463981,-2.144732,0.623004,-0.680468,1.030912,2.202159,-2.523493,-1.376163,-3.906571,1.486772,1.146082,0.610447,0.294709,-2.051878,0.346170,-1.570810,0.810664,1.312737,-0.598283,1.391486,0.892014,1.891881,-0.209174,1.190925,0.270343,1.520102,-0.497934,3.253058,-2.751960,-1.922005,-1.421661,0.736433,0.910328,-0.743830,0.803885,-0.661021,0.207366,-2.165053,1.268656,0.002694,0.609848,-1.468282,0.386296,-0.264965,-3.149535,-0.062867,0.126944,-0.717990,0.961019,1.122738,0.739335,0.196287,-0.468862,0.200205,0.765939,-0.323000,0.233741,0.300855,-1.308030,0.158573,1.219204,0.097616,1.397061,-1.310812,-1.833760,-0.755403,1.003820,-2.086908,-0.144341,-1.723674,1.967988,1.034040,0.185816,0.143238,0.277021,2.390373,0.970698,-0.237321,0.818405,0.365417,-0.419544,-0.501824,-0.424397,-0.363346,-0.520263,1.413003,-0.523555,-0.520093,0.166019,-0.640401,1.623404,0.031311,0.021299,-1.952017,-0.633642,-2.037345,1.167284,0.745304,-0.324635,2.049444,-1.260523,1.250351,0.046612,-0.382971,-1.041035,0.167239,0.022720,0.763298,0.066945,-0.066681,-1.498404,0.484322,-0.003609,1.814245,1.0


In [None]:
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import RidgeClassifier
from scipy.stats import uniform, randint

# Assuming complete_cleaning is your DataFrame ready for use
features = [col for col in complete_cleaning.columns if col != 'target_x']
X = complete_cleaning[features]
y = complete_cleaning['target_x']

# Initialize the RidgeClassifier
rr = RidgeClassifier()

# Create a pipeline with Sequential Feature Selector and RidgeClassifier
pipeline = Pipeline([
    ('feature_selector', SequentialFeatureSelector(estimator=rr, direction='forward')),
    ('classifier', rr)
])

# Define the parameter distributions
param_distributions = {
    'feature_selector__n_features_to_select': randint(1, len(features) + 1),  # Correct inclusion of n_features_to_select
    'classifier__alpha': uniform(0.1, 10),  # Regularization strength
    'classifier__solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']  # Solver
}

# Initialize TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_distributions,
    n_iter=100,  # Number of parameter settings sampled
    cv=tscv,  # Cross-validation strategy
    verbose=1,
    n_jobs=-1
)

# Perform the randomized search
random_search.fit(X, y)

# Output the best combination of parameters
best_hyperparams = random_search.best_params_
print("Best hyperparameters:", best_hyperparams)

# Evaluate the best model further if necessary, for example on a test set


Fitting 5 folds for each of 100 candidates, totalling 500 fits
