In [148]:
import os
import numpy as np
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import time
import fredapi as fa
import pandas_ta as ta
from datetime import datetime
# from sklearn.linear_model import LinearRegression


In [149]:
# 1.load data，standardize columns, date&time
directory = "C:/Users/chenl/market data export/MINUTE/"
SPY = pd.read_csv(directory + "SPY.csv", parse_dates=["date"])
SPY['time'] = SPY['date'].dt.time
SPY['date'] = pd.to_datetime(SPY['date'].dt.date)
# Set the multi-level index using the date and time columns
SPY['time'] = SPY['time'].astype(str).str.zfill(8)
SPY.set_index(['date', 'time'], inplace=True)

# Filter 9:30-16:00
SPY = SPY.loc[(SPY.index.get_level_values('time') >= '09:00:00') & 
              (SPY.index.get_level_values('time') <= '16:00:00')&
              (SPY.index.get_level_values('date') >= pd.to_datetime('2024-04-01'))&
              (SPY.index.get_level_values('date') <= pd.to_datetime('2024-08-21'))
              ]
# 2. create intraday indicators value columns
#define ewm function for mannual SMI calcualtion 
def ema(series, period):
    return series.ewm(span=period, adjust=False).mean()
def intraday_indicators(group):
    # SMA
    group['SMA_5'] = ta.sma(group['close'], length=5)
    group['SMA_9'] = ta.sma(group['close'], length=9)
    group['SMA_18'] = ta.sma(group['close'], length=18)
    # group['SMA_50'] = ta.sma(group['close'], length=50)
    # group['SMA_100'] = ta.sma(group['close'], length=100) #use 50 and 100 to found cloud, bearish if 50 is below 100, bull if 50 is above 100
    # group['SMA_200'] = ta.sma(group['close'], length=100)

    # ADX: Calculate the ADX using high, low, and close prices,
    #+DI > -DI: The trend is upward. -DI > +DI: The trend is downward.
    #0-25: Weak or no trend. 25-50: Strong trend. 50-75: Very strong trend. 75-100: Extremely strong trend.
    group['ADX'] = ta.adx(SPY['high'], SPY['low'], SPY['close'], length=20)['ADX_20']
    
    #RSI 
    group['RSI_21'] = ta.rsi(group['close'], length=21)

    #MACD: create new dp with multiple columne then merge back
    macd = ta.macd(group['close'], fast=12, slow=26, signal=9)
    group['MACD'] = macd['MACD_12_26_9']
    group['MACD_hist'] = macd['MACDh_12_26_9']
    group['MACD_signal'] = macd['MACDs_12_26_9']

    #bollinger: create new dp with multiple columne then merge back
    bollinger = ta.bbands(group['close'], length=20, std=2)
    group['Bollinger_upper'] = bollinger['BBU_20_2.0']
    group['Bollinger_middle'] = bollinger['BBM_20_2.0']
    group['Bollinger_lower'] = bollinger['BBL_20_2.0']
    group['Bollinger_width'] = (bollinger['BBU_20_2.0']-bollinger['BBL_20_2.0'])/bollinger['BBM_20_2.0']
    
    # ATR - volatility measure, max up and down range, used for gain collection stop loss 
    group["ATR"]=ta.atr(group.high, group.low, group.close, length =14)
    group["rolling_std_14"]=group["close"].pct_change().rolling(window=14).std()* group["close"]
    # Calculate Chandelier Exit
    group['Chandelier_Exit'] = group['high'].rolling(window=30).max() - group['ATR'] * 1.2
     
    # SMI: difference between the current closing and the median high/low price range over a specified period the k period. 
    # This difference is then divided by the absolute value of the high/low range and multiplied by 100
    # SMI line crosses above the signal line, considered bullish, when the SMI line crosses below the signal line, a bearish signal
    # relativeRange = close - (highestHigh + lowestLow) / 2; double exponential moving average: ds = EMA(EMA(relativeRange, D);
    # dhl = EMA(EMA((highestHigh - lowestLow), D; smi = 200 * (ds / dhl); signal = EMA(smi, EMA period)
    # Calculate SMI manually
    k_period = 14
    d_period = 5
    ema_period = 3   
    # Determine the highest high and lowest low within the K period
    group['highestHigh'] = group['high'].rolling(window=k_period).max()
    group['lowestLow'] = group['low'].rolling(window=k_period).min()
    # Calculate relative range
    group['relativeRange'] = group['close'] - (group['highestHigh'] + group['lowestLow']) / 2
    # Calculate double exponential moving averages
    group['ds'] = ema(ema(group['relativeRange'], d_period), d_period)
    group['dhl'] = ema(ema(group['highestHigh'] - group['lowestLow'], d_period), d_period)
    # Calculate SMI
    group['SMI'] = 200 * (group['ds'] / group['dhl'])
    # Calculate Signal line
    group['SMI_signal'] = ema(group['SMI'], ema_period)  

    # warning: the ta package SMI line and signal line calculation is wrong!! don't use it!
    # Stochastic Momentum Index: SMI Line: The main line; Signal line (D); Oscillator (K): smoothed version of the SMI
    # smi = ta.smi(high=group['high'], low=group['low'], close=group['close'], fast_k=5, slow_k=3, slow_d=3)
    # group['SMI'] = smi['SMI_5_20_5']*100 #main line
    # group['SMI_signal'] = smi['SMIs_5_20_5']*100#Signal line (D)
    
    #target change
    group['pct_change_2min'] = group['close'].pct_change(periods=-2) * 100
    group['pct_change_5min'] = group['close'].pct_change(periods=-5) * 100
    group['pct_change_7min'] = group['close'].pct_change(periods=-7) * 100
    group['pct_change_10min'] = group['close'].pct_change(periods=-10) * 100
    group['pct_change_20min'] = group['close'].pct_change(periods=-20) * 100
    group['pct_change_30min'] = group['close'].pct_change(periods=-30) * 100

    #target direction
    group['direction_2min'] = np.sign(group['close'].shift(-2) - group['close'])
    group['direction_5min'] = np.sign(group['close'].shift(-5) - group['close'])
    group['direction_7min'] = np.sign(group['close'].shift(-7) - group['close'])
    group['direction_10min'] = np.sign(group['close'].shift(-10) - group['close'])
    group['direction_20min'] = np.sign(group['close'].shift(-20) - group['close'])
    group['direction_30min'] = np.sign(group['close'].shift(-30) - group['close']) 
    return group

# Reset the index, group by the 'date' column, apply the function, and set the multi-level index again
SPY.reset_index(inplace=True)
SPY = SPY.groupby('date').apply(intraday_indicators)
SPY.set_index(['date', 'time'], inplace=True)

# 3. generate signals columns for each strategy
def generate_signals(df):
    df['signal_ma'] = 0
    df['signal_rsi'] = 0
    df['signal_macd'] = 0
    df['signal_bollinger'] = 0
    df['signal_stochastic'] = 0
    df["signal_BB+RSI"]=0
    df["signal_Chandelier"]=0
    for date, group in df.groupby('date'):
        for i in range(1, len(group)):            
            if group.iloc[i][['SMA_5', 'SMA_9', 'SMA_18', 'RSI_21', 'MACD', 'MACD_signal', 'Bollinger_lower', 'Bollinger_upper']].isna().any():
                continue
            if group.iloc[i]['SMA_5'] < group.iloc[i]['SMA_18'] and group.iloc[i-1]['SMA_5'] >= group.iloc[i-1]['SMA_18'] :
                df.at[group.index[i], 'signal_ma'] = 1
            elif group.iloc[i]['SMA_5'] > group.iloc[i]['SMA_18'] and group.iloc[i-1]['SMA_5'] <= group.iloc[i-1]['SMA_18']:
                df.at[group.index[i], 'signal_ma'] = -1
            
            if group.iloc[i]['open'] < group.iloc[i]['Bollinger_lower'] and group.iloc[i]['close'] > group.iloc[i]['Bollinger_lower'] and group.iloc[i]["SMA_5"] > group.iloc[i]['SMA_18']:
                df.at[group.index[i], 'signal_bollinger'] = 1
            elif group.iloc[i]['open'] > group.iloc[i]['Bollinger_upper'] and group.iloc[i]['close'] < group.iloc[i]['Bollinger_upper']:
                df.at[group.index[i], 'signal_bollinger'] = -1                  

            if group.iloc[i]['MACD'] > group.iloc[i]['MACD_signal'] and group.iloc[i-1]['MACD'] <= group.iloc[i-1]['MACD_signal'] and group.iloc[i]['MACD']<0:
                df.at[group.index[i], 'signal_macd'] = 1
            elif group.iloc[i]['MACD'] < group.iloc[i]['MACD_signal'] and group.iloc[i-1]['MACD'] >= group.iloc[i-1]['MACD_signal'] and group.iloc[i]['MACD']>0:
                df.at[group.index[i], 'signal_macd'] = -1
        
            if group.iloc[i]['RSI_21'] >= 30 and group.iloc[i-1]['RSI_21'] < 30 and group.iloc[i]["SMA_5"] > group.iloc[i]['SMA_18']:
                df.at[group.index[i], 'signal_rsi'] = 1
            elif group.iloc[i]['RSI_21'] < 55 and group.iloc[i-1]['RSI_21'] >= 55:
                df.at[group.index[i], 'signal_rsi'] = -1
            
            if (group.iloc[i]['SMI'] > group.iloc[i]['SMI_signal'] and group.iloc[i-1]['SMI'] <= group.iloc[i-1]['SMI_signal'] and group.iloc[i]['SMI']<=-30 and group.iloc[i]['MACD']>0) or (group.iloc[i]['SMI']>-40 and group.iloc[i-1]['SMI']<=-40 and group.iloc[i]['MACD']>0):
                df.at[group.index[i], 'signal_stochastic'] = 1
            elif (group.iloc[i]['SMI'] < group.iloc[i]['SMI_signal'] and group.iloc[i-1]['SMI'] >= group.iloc[i-1]['SMI_signal'] and group.iloc[i]['SMI'] >20) or (group.iloc[i]['SMI']>40 and group.iloc[i-1]['SMI']<=40):
                df.at[group.index[i], 'signal_stochastic'] = -1   
            # hybrid
            if group.iloc[i]['SMI'] <-55 and group.iloc[i]['close'] < group.iloc[i]['Bollinger_lower'] and group.iloc[i]['Bollinger_width']> 0.002 :
                df.at[group.index[i], "signal_BB+SMI"] = 1
            elif group.iloc[i]['SMI'] >45 or group.iloc[i]['close'] > group.iloc[i]['Bollinger_upper'] or (group.iloc[i]['SMI'] < group.iloc[i]['SMI_signal'] and group.iloc[i-1]['SMI'] >= group.iloc[i-1]['SMI_signal']):
                df.at[group.index[i], "signal_BB+SMI"] = -1  
            
            #buy and sell signal for chandelier buy and sell
            if group.iloc[i]['close'] > group.iloc[i]['Chandelier_Exit'] and group.iloc[i-1]['MACD'] > group.iloc[i]['MACD_signal']:
                df.at[group.index[i], 'signal_Chandelier'] = 1
            elif group.iloc[i]['close'] < group.iloc[i]['Chandelier_Exit'] and group.iloc[i-1]['MACD'] < group.iloc[i]['MACD_signal']:
                df.at[group.index[i], 'signal_Chandelier'] = -1                

    return df
    
SPY = generate_signals(SPY)


In [150]:
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, mean_squared_error, classification_report, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder


# data prep & feature creation

In [151]:
# reset index, and make huor of the day as a feature
SPY.reset_index(inplace=True) #reset index date and time
SPY['hour of day'] = SPY['time'].str[:2] #create a column call the hour of the day

In [152]:
# distance from MA in ATR
SMA_list=['SMA_5','SMA_9', 'SMA_18',  
          # 'SMA_50','SMA_100', 'SMA_200'
         ]
for sma in SMA_list:
    SPY[f"distance from {sma} ATR"]=(SPY["close"]-SPY[sma])/SPY["ATR"]

In [153]:
# distance from MA in Vol, pending now 
# SMA_list=['SMA_5','SMA_9', 'SMA_18', 'SMA_50', 'SMA_100', 'SMA_200']
# for sma in SMA_list:
#     SPY[f"distance from {sma} std"]=(SPY["close"]-SPY[sma])/SPY["rolling_std_14"]

In [154]:
SPY = SPY.loc[(SPY['time'] >= '09:30:00') & 
              (SPY['time'] <= '16:00:00')
              ]
SPY=SPY.drop(columns=['date',"time"])
SPY = pd.get_dummies(SPY, columns=['hour of day'], drop_first=True)


In [155]:
SPY.columns
len(SPY)

30498

In [156]:
SPY.isna().sum()

open                            0
high                            0
low                             0
close                           0
volume                          0
SMA_5                           0
SMA_9                           0
SMA_18                          0
RSI_21                          0
MACD                            0
MACD_hist                     234
MACD_signal                   234
Bollinger_upper                 0
Bollinger_middle                0
Bollinger_lower                 0
Bollinger_width                 0
ATR                             0
rolling_std_14                  0
Chandelier_Exit                 0
highestHigh                     0
lowestLow                       0
relativeRange                   0
ds                              0
dhl                             0
SMI                             0
SMI_signal                      0
pct_change_2min               156
pct_change_5min               390
pct_change_7min               546
pct_change_10m

# training & testing


In [157]:
features=['open', 'high', 'low', 'close', 'volume', 'SMA_5', 'SMA_9', 'SMA_18',
          # 'SMA_50', 'SMA_100', 'SMA_200', 
          'RSI_21', 'MACD', 'MACD_hist',
       'MACD_signal', 'Bollinger_upper', 'Bollinger_middle', 'Bollinger_lower',
       'Bollinger_width', 'ATR', 'rolling_std_14', 'Chandelier_Exit',
       'highestHigh', 'lowestLow', 'relativeRange', 'ds', 'dhl', 'SMI',
       'SMI_signal', 'signal_ma', 'signal_rsi',
       'signal_macd', 'signal_bollinger', 'signal_stochastic', 'signal_BB+RSI',
       'signal_Chandelier', 
          # 'signal_BB+SMI', 
       'distance from SMA_5 ATR', 'distance from SMA_9 ATR',
       'distance from SMA_18 ATR', 
          # 'distance from SMA_50 ATR',
       # 'distance from SMA_100 ATR', 
          # 'distance from SMA_200 ATR',
        'hour of day_10', 'hour of day_11', 'hour of day_12', 'hour of day_13', 'hour of day_14', 'hour of day_15',
       'hour of day_16']
target_list=['pct_change_2min', 'pct_change_5min', 'pct_change_7min',
       'pct_change_10min', 'pct_change_20min', 'pct_change_30min',
       'direction_2min', 'direction_5min', 'direction_7min', 'direction_10min',
       'direction_20min', 'direction_30min']
SPY=SPY[features+['direction_10min']]
SPY=SPY.dropna(axis=0)
x=SPY[features]
y=SPY['direction_10min']
# Step 1: Split the data into training and remaining (validation + test)
X_train, X_remaining, y_train, y_remaining = train_test_split(x, y, test_size=0.4, stratify=y)

# Step 2: Split the remaining data into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_remaining, y_remaining, test_size=0.5, stratify=y_remaining)

# Step 3: Initialize the models
rf_model = RandomForestClassifier()
dt_model = DecisionTreeClassifier()
lgb_model = lgb.LGBMClassifier()

param_grid_rf={
    'n_estimators': [100, 200],
    'max_depth': [10, 20],
    'min_samples_split': [10, 20,30]
}
param_grid_dt={
    'max_depth': [10, 20],
    'min_samples_split': [10, 20,30],
    'min_samples_leaf': [10, 15,30]    
}
param_grid_lgb={
    'num_leaves': [31, 50],
    'learning_rate': [0.001,0.01, 0.1],
    'n_estimators': [100, 200]    
}
# Step 5: Use GridSearchCV with cross-validation
grid_search_rf = GridSearchCV(estimator=rf_model, param_grid=param_grid_rf, cv=5, scoring='precision_macro')
grid_search_dt = GridSearchCV(estimator=dt_model, param_grid=param_grid_dt, cv=5, scoring='precision_macro')
grid_search_lgb = GridSearchCV(estimator=lgb_model, param_grid=param_grid_lgb, cv=5, scoring='precision_macro')

# Step 6: Fit the models using GridSearchCV
grid_search_rf.fit(X_train, y_train)
grid_search_dt.fit(X_train, y_train)
grid_search_lgb.fit(X_train, y_train)

# Step 7: Get the best parameters and best estimator
best_rf = grid_search_rf.best_estimator_
best_dt = grid_search_dt.best_estimator_
best_lgb = grid_search_lgb.best_estimator_

# Step 8: Validate the models on the validation set using the best found parameters
y_val_pred_rf = best_rf.predict(X_val)
y_val_pred_dt = best_dt.predict(X_val)
y_val_pred_lgb = best_lgb.predict(X_val)

print("Validation Performance:")

print("Random Forest:")
print(classification_report(y_val, y_val_pred_rf))
print("Accuracy:", accuracy_score(y_val, y_val_pred_rf))

print("Decision Tree:")
print(classification_report(y_val, y_val_pred_dt))
print("Accuracy:", accuracy_score(y_val, y_val_pred_dt))

print("LightGBM:")
print(classification_report(y_val, y_val_pred_lgb))
print("Accuracy:", accuracy_score(y_val, y_val_pred_lgb))

# Step 9: Evaluate the final model performance on the test set
y_test_pred_rf = best_rf.predict(X_test)
y_test_pred_dt = best_dt.predict(X_test)
y_test_pred_lgb = best_lgb.predict(X_test)

print("Test Performance:")

print("Random Forest:")
print(classification_report(y_test, y_test_pred_rf))
print("Accuracy:", accuracy_score(y_test, y_test_pred_rf))

print("Decision Tree:")
print(classification_report(y_test, y_test_pred_dt))
print("Accuracy:", accuracy_score(y_test, y_test_pred_dt))

print("LightGBM:")
print(classification_report(y_test, y_test_pred_lgb))
print("Accuracy:", accuracy_score(y_test, y_test_pred_lgb))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002237 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001717 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002753 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002207 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002023 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001747 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001840 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001883 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001857 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001865 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001989 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001952 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001859 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001210 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001836 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001882 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001912 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001979 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001976 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000966 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002026 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001761 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001765 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001924 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001885 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001751 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001844 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001839 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001947 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001932 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001885 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002052 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001813 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001722 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001877 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002031 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001768 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001853 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002132 sec

  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001842 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001863 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001921 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001940 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7423
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769628
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636421


  _warn_prf(average, modifier, msg_start, len(result))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001914 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 14152, number of used features: 41
[LightGBM] [Info] Start training from score -0.769475
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636554
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002404 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7424
[LightGBM] [Info] Number of data points in the train set: 17690, number of used features: 41
[LightGBM] [Info] Start training from score -0.769597
[LightGBM] [Info] Start training from score -4.875480
[LightGBM] [Info] Start training from score -0.636447
Validation Performance:
Random Forest:
              precision    recall  f1-score   support

    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Performance:
Random Forest:
              precision    recall  f1-score   support

        -1.0       0.80      0.70      0.75      2731
         0.0       0.00      0.00      0.00        45
         1.0       0.76      0.85      0.80      3121

    accuracy                           0.78      5897
   macro avg       0.52      0.52      0.52      5897
weighted avg       0.77      0.78      0.77      5897

Accuracy: 0.7771748346616923
Decision Tree:
              precision    recall  f1-score   support

        -1.0       0.59      0.61      0.60      2731
         0.0       0.00      0.00      0.00        45
         1.0       0.64      0.63      0.63      3121

    accuracy                           0.61      5897
   macro avg       0.41      0.41      0.41      5897
weighted avg       0.61      0.61      0.61      5897

Accuracy: 0.6135323045616415
LightGBM:
              precision    recall  f1-score   support

        -1.0       0.76      0.71      0.73      2731
         0.0 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [180]:
# test data load
directory = "C:/Users/chenl/market data export/MINUTE/"
test = pd.read_csv(directory + "SPY_min.csv", parse_dates=["date"])
test['time'] = test['date'].dt.time
test['date'] = pd.to_datetime(test['date'].dt.date)
# Set the multi-level index using the date and time columns
test['time'] = test['time'].astype(str).str.zfill(8)
test.set_index(['date', 'time'], inplace=True)

# Filter 9:30-16:00
test = test.loc[(test.index.get_level_values('time') >= '09:00:00') & 
              (test.index.get_level_values('time') <= '16:00:00')
]
test.reset_index(inplace=True)
test = test.groupby('date').apply(intraday_indicators)
test.set_index(['date', 'time'], inplace=True)
test = generate_signals(test)
test.reset_index(inplace=True) #reset index date and time
test['hour of day'] = test['time'].str[:2] #create a column call the hour of the day
# distance from MA in ATR
SMA_list=['SMA_5','SMA_9', 'SMA_18',  
          # 'SMA_50','SMA_100', 'SMA_200'
         ]
for sma in SMA_list:
    test[f"distance from {sma} ATR"]=(test["close"]-test[sma])/test["ATR"]
# test = test.loc[(SPY['time'] >= '09:30:00') & 
#               (SPY['time'] <= '16:00:00')
#               ]
test=test.drop(columns=['date',"time"])
test = pd.get_dummies(test, columns=['hour of day'], drop_first=True)
test=test.dropna()
x=test[features]
y=test['direction_10min']

In [182]:
test.isna().sum()

open                        0
high                        0
low                         0
close                       0
volume                      0
SMA_5                       0
SMA_9                       0
SMA_18                      0
RSI_21                      0
MACD                        0
MACD_hist                   0
MACD_signal                 0
Bollinger_upper             0
Bollinger_middle            0
Bollinger_lower             0
Bollinger_width             0
ATR                         0
rolling_std_14              0
Chandelier_Exit             0
highestHigh                 0
lowestLow                   0
relativeRange               0
ds                          0
dhl                         0
SMI                         0
SMI_signal                  0
pct_change_2min             0
pct_change_5min             0
pct_change_7min             0
pct_change_10min            0
pct_change_20min            0
pct_change_30min            0
direction_2min              0
direction_

831

In [181]:
# Step 8: Validate the models on the 
y_val_pred_rf = best_rf.predict(x)
y_val_pred_dt = best_dt.predict(x)
y_val_pred_lgb = best_lgb.predict(x)

print("Validation Performance:")

print("Random Forest:")
print(classification_report(y, y_val_pred_rf))
print("Accuracy:", accuracy_score(y, y_val_pred_rf))

print("Decision Tree:")
print(classification_report(y, y_val_pred_dt))
print("Accuracy:", accuracy_score(y, y_val_pred_dt))

print("LightGBM:")
print(classification_report(y, y_val_pred_lgb))
print("Accuracy:", accuracy_score(y, y_val_pred_lgb))



Validation Performance:
Random Forest:
              precision    recall  f1-score   support

        -1.0       0.52      0.52      0.52       385
         0.0       0.00      0.00      0.00         4
         1.0       0.58      0.58      0.58       442

    accuracy                           0.55       831
   macro avg       0.37      0.37      0.37       831
weighted avg       0.55      0.55      0.55       831

Accuracy: 0.5511432009626955
Decision Tree:
              precision    recall  f1-score   support

        -1.0       0.50      0.57      0.53       385
         0.0       0.00      0.00      0.00         4
         1.0       0.57      0.50      0.54       442

    accuracy                           0.53       831
   macro avg       0.36      0.36      0.36       831
weighted avg       0.53      0.53      0.53       831

Accuracy: 0.5318892900120337
LightGBM:
              precision    recall  f1-score   support

        -1.0       0.50      0.49      0.50       385
       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
