# Setup

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy import stats

from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV, RepeatedKFold, cross_val_score
import sklearn.metrics as skm
from sklearn.metrics import accuracy_score
from sklearn.cluster import AffinityPropagation as AP
from sklearn.preprocessing import MinMaxScaler

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from xgboost import XGBRegressor, XGBClassifier

import re
import time
import datetime as dt

import sys
import os
sys.path.append(os.path.abspath
                (os.path.join
                 (os.path.dirname("constants.py"), '..')))
from constants import *
from scripts import *

import warnings
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

hide_toggle()

# FUNCTIONS

In [2]:
def train_test_split_binary(dfToSplitFunc, tournamentId, currentTarget, cut_off_var, splitType, verbose=True):
    
    for col in dfToSplitFunc.columns:
            dfToSplitFunc[col] = dfToSplitFunc[col].fillna(0)
    
    if splitType==0:
        testData = dfToSplitFunc[dfToSplitFunc['tournament_id']==tournamentId].copy()
        xtest= testData.drop(['Date',currentTarget],axis=1).copy()
        xtest= xtest.drop(offCols,axis=1,errors='ignore')
        ytest = testData[currentTarget]

        trainData = dfToSplitFunc[dfToSplitFunc['tournament_id']!=tournamentId].copy()
        xtrain = trainData.drop(['Date',currentTarget],axis=1).copy()
        xtrain = xtrain.drop(offCols,axis=1,errors='ignore')
        ytrain = trainData[currentTarget]
        
    elif splitType==1:
        #print(dfToSplitFunc.columns)
        xCols = dfToSplitFunc.drop(['Date', currentTarget]+offCols,axis=1,errors='ignore')
        #print(xCols.columns)
        yCols = dfToSplitFunc[currentTarget]
        xtrain, xtest, ytrain, ytest = train_test_split(xCols, yCols, test_size=0.20, shuffle=False)
    
    if list(ytrain).count(0)/len(ytrain)==1:
        print(len(ytrain))
        print(dropTypeF)
        print(dfToSplitFunc[currentTarget])
        print('==========================================================')
    
    ytrain_mean, ytrain_std = np.mean(ytrain), np.std(ytrain)
    cut_off = ytrain_std * cut_off_var
    lower, upper = ytrain_mean - cut_off, ytrain_mean + cut_off
    
    outlierMask = ytrain.apply(lambda x: False if x < lower or x > upper else True)
    
    if verbose:
        print(f'train len: {len(xtrain)}')
    lentemp = len(xtrain)
    #xtrain, ytrain = xtrain[outlierMask], ytrain[outlierMask]
    if verbose:
        print(f'train len no outliers: {len(xtrain)}')
        print(f'percent of len removed: {round(abs(len(xtrain)/lentemp*100-100),2)}%')
        print(f'test len: {len(xtest)}\n')
    
    return xtrain, ytrain, xtest, ytest

In [3]:
def matchListToDfs(df):
    matchListDateFilter = (df[df['Date'] >= pd.to_datetime('2019-7-01',format='%Y-%m-%d')]
                                        .reset_index(drop=True).copy())
    matchListDateFilter['realSemesterYear'] = (matchListDateFilter['realYear'].astype(str)
                                               +matchListDateFilter['realSemester'].astype(str))
    matchListDateFilter['tournament_id'] = (matchListDateFilter['TournamentRegion'].astype(str)
                                               +matchListDateFilter['realSemesterYear'].astype(str))
    
    playerMatchList = matchListDateFilter.copy()
    teamMatchList = matchListDateFilter.copy()

    for color in ['Blue','Red']:
        for feature in meanFeatures:
                teamMatchList[f'Team_{color}_{feature}'] = (matchListDateFilter[[f"{position}_{color}_{feature}" for position in positions]]
                                                        .mean(skipna=True,axis=1).copy())
                teamMatchList.drop([f"{position}_{color}_{feature}" for position in positions],axis=1,inplace=True)

        for feature in sumFeatures:
                teamMatchList[f'Team_{color}_{feature}'] = (matchListDateFilter[[f"{position}_{color}_{feature}" for position in positions]]
                                                        .sum(skipna=True,axis=1).copy())
                teamMatchList.drop([f"{position}_{color}_{feature}" for position in positions],axis=1,inplace=True)

        teamMatchList.drop([f"{position}_{color}" for position in positions],axis=1,inplace=True)
        
    return playerMatchList, teamMatchList

def regionLists(df, currentYear):
    regions = df['TournamentRegion'].unique()
    regionsToFeed = [x for x in df['TournamentRegion'].unique()]
    regionsFilterTemp = ([x for x in regions if currentYear in (df[df['TournamentRegion']==x])['realYear'].unique()
                                            and currentYear-1 in (df[df['TournamentRegion']==x])['realYear'].unique()])
    regionsToPredict = []
    for region in regionsFilterTemp:
        regionsFilterSize = df[(df['realYear']==currentYear) & (df['TournamentRegion']==region)]
        regionsFilterSizeTrain = df[(df['realYear']!=currentYear) & (df['TournamentRegion']==region)]
        if len(regionsFilterSize)>=30:
            regionsToPredict.append(region)
    
    return regions, regionsToFeed, regionsToPredict

def generateRegionDf(df, regionDataListF, regionsFeatureColsF, cut_off_var, tempTournamentIdF, currentTarget, splitType):
    
    dfTemp = df[df['TournamentRegion'].isin(regionDataListF)].copy()
    print(max(dfTemp['realSemesterYear']))
    tempCols = [x for x in list(dfTemp.columns) if x.split('_')[-1] in regionsFeatureColsF]
    dfTemp = dfTemp[tempCols+infoCols]
    dfTemp = dfTemp.sort_values(by='Date',ascending=True).copy()
    
    xtrain,ytrain,xtest,ytest = train_test_split_binary(dfTemp, tempTournamentIdF, currentTarget, cut_off_var, splitType, verbose=False)
    
    return dfTemp, xtrain, ytrain, xtest, ytest

def generateMetric(model_number, regionDataListF, regionsFeatureColsF, cut_off_var, tempTournamentIdF, currentTarget, dfToSplit, splitType):
    
    dfTemp, xtrain, ytrain, xtest, ytest = generateRegionDf(dfToSplit, regionDataListF, regionsFeatureColsF
                                                            , cut_off_var, tempTournamentIdF, currentTarget, splitType)
    
    print('number of 0s:')
    print(list(ytrain).count(0)/len(ytrain))
    print(ytrain.unique())
    print('len of train:')
    print(len(xtrain))
    print(len(ytrain))
    print('num of cols:')
    print(len(xtrain.columns))

    errors3=0
    rep=3
    for i in range(rep):
        region_model = base_models[model_number]
        region_model.fit(xtrain, ytrain)
        pred = region_model.predict(xtest)
        errors3 = accuracy_score(ytest, pred)+errors3
    errors2=errors3/rep
    
    errors3=0
    for i in range(rep):
        region_model = base_models[model_number]
        region_model.fit(xtrain, ytrain)
        pred = region_model.predict(xtest)
        errors3 = accuracy_score(ytest, pred)+errors3
    errors4=errors3/rep
    
    print('len of pred:')
    print(len(pred))
    print('========================')
    metric=(errors2+errors4)/2
    metric=round(abs(metric-1),3)
    
    return metric, pred

hide_toggle()

# Load

In [4]:
teamDataTable = pd.read_pickle("Data/raw_data/teamDataTable.pkl")
playerDataTable = pd.read_pickle("Data/raw_data/playerDataTable.pkl")

matchList = pd.read_pickle("Data/raw_data/matchList.pkl")
matchListFill = pd.read_pickle("Data/raw_data/matchListFill.pkl")

teamMatchList = pd.read_pickle("Data/raw_data/teamMatchList.pkl")
playerMatchList = pd.read_pickle("Data/raw_data/playerMatchList.pkl")

regionsStats = pd.read_pickle("./Data/raw_data/regionsStats.pkl")

with open(f'./Data/raw_data/regionsFeatureCols.json', 'r') as fp:
    regionsFeatureCols = json.load(fp)
with open(f'./Data/raw_data/regionsTrainData.json', 'r') as fp:
    regionsTrainData = json.load(fp)
    

In [5]:
content=1
playerMatchList, teamMatchList = matchListToDfs(matchListFill)
#             0              1
dfsContent = [playerMatchList, teamMatchList]
dfToSplit = dfsContent[content].copy()

print(f'main df size: {len(dfToSplit)}')

params = {'objective': 'binary:logistic'}
base_models = [
              RandomForestClassifier(), #0
              #XGBClassifier(params=params,num_class=2), #1
              KNeighborsClassifier(algorithm = 'brute'), #2
              LinearSVC(C=0.0001), #3
              BaggingClassifier(DecisionTreeClassifier(),max_samples=0.5,max_features=1.0,n_estimators=10), #4
              AdaBoostClassifier(DecisionTreeClassifier(min_samples_split=10,max_depth=4),n_estimators=10,learning_rate=0.6), #5
              DecisionTreeClassifier(), #6
              LogisticRegression(), #7
              LogisticRegression(solver='newton-cg'), #8
              LogisticRegression(solver='newton-cg') #8
              ]

currentTarget = 'Score'
currentYear = 2022
currentSemester = 1
currentSemesterYear = str(currentYear)+str(currentSemester)
defaultModel = 8
infoCols = ['Date','tournament_id',currentTarget,'TournamentRegion']
splitType = 0

dfToSplit.drop('totalKills',axis=1,inplace=True)
featureCols = [x for x in dfToSplit.columns if x not in offCols+infoCols]
featureCols = list(set([x.replace('Team_Blue_','').replace('Team_Red_','') for x in featureCols]))

regions, regionsToFeed, regionsToPredict = regionLists(dfToSplit, currentYear)

dfToSplit = dfToSplit[dfToSplit['realSemesterYear'].astype(int)<=int(currentSemesterYear)]

regionsToPredict.remove('MSI')
regionsToPredict.remove('World')
regionsToPredict = regionsToPredict[:5]

main df size: 26314


# SKLEARN

In [6]:
regionsStats = pd.DataFrame(columns=['region','model','size'])
regionsStats['region'] = regionsToPredict
regionsList = regionsStats['region']
regionsStats['model'] = defaultModel

regionsTrainData = dict(zip(regionsList,regionsList.apply(lambda x: [x])))
regionsStats['accuracy_0'] = np.nan
regionsStats['accuracy_1'] = np.nan
regionsStats['accuracy_2'] = np.nan
regionsStats['cut_off_var'] = 1.5

regionsFeatureCols = dict(zip(regionsList,[0]*len(regionsList)))
for key in regionsFeatureCols:
    regionsFeatureCols[key] = featureCols.copy()

### TRAIN DATA SELECTION

In [7]:
regionsTrainData = dict(zip(regionsList,regionsList.apply(lambda x: [x])))
regionsStats['accuracy_0'] = np.nan

for n,region in enumerate(regionsToPredict):
    print('=========\n')
    regionFinalAcc = regionsStats['accuracy_0'][n]
    tempTournamentId = region+currentSemesterYear
    cut_off_var = regionsStats['cut_off_var'][n]
    region_model_number = regionsStats['model'][n]
    print(f'[{n+1} of {len(regionsToPredict)}] region {region} -> {regionFinalAcc}:\n')
    
    regionsToTest = [x for x in regionsToFeed]
    regionsToTest.remove(region)
    random.shuffle(regionsToTest)
    for nn,regionToTest in enumerate(regionsToTest):
        regionsTrainData[region].append(regionToTest)
        
        metric, pred = generateMetric(region_model_number, regionsTrainData[region], regionsFeatureCols[region]
                                      , cut_off_var, tempTournamentId, currentTarget, dfToSplit, splitType)
        
        if metric < regionFinalAcc or np.isnan(regionFinalAcc):
            regionFinalAcc = metric
            
            print(f'{regionFinalAcc} -> {regionToTest} added                                           ')
        else:
            regionsTrainData[region].remove(regionToTest)
            
        #print(f'[{nn+1} of {len(regionsToTest)}] testing: {regionToTest}                        ',end='\r')
    
    regionsStats['accuracy_0'][n] = regionFinalAcc
    regionsStats['size'][n] = len(pred)
    
    print(f'\n\naccuracy: {regionFinalAcc}')
    print(f'{region} train data: {regionsTrainData[region]}\nlen:{len(regionsTrainData[region])}')
    print(f'test data len: {len(pred)}\n')
    
printFinalResults(regionsStats, 'accuracy_0')

hide_toggle()


[1 of 5] region LPL -> nan:

20221
number of 0s:
0.5298452468680914
[0 1]
len of train:
2714
2714
num of cols:
22
len of pred:
297
0.347 -> LCL added                                           
20221
number of 0s:
0.5334261838440112
[0 1]
len of train:
2872
2872
num of cols:
22
len of pred:
297
20221
number of 0s:
0.5319901025097208
[0 1]
len of train:
2829
2829
num of cols:
22
len of pred:
297
20221
number of 0s:
0.5295226130653267
[0 1]
len of train:
3184
3184
num of cols:
22
len of pred:
297
20221
number of 0s:
0.5288065843621399
[0 1]
len of train:
3402
3402
num of cols:
22
len of pred:
297
0.337 -> Hitpoint_Tier2 added                                           
20221
number of 0s:
0.5285004384682841
[1 0]
len of train:
3421
3421
num of cols:
22
len of pred:
297
20221
number of 0s:
0.5284692417739628
[1 0]
len of train:
3495
3495
num of cols:
22
len of pred:
297
20221
number of 0s:
0.5263157894736842
[0 1]
len of train:
3458
3458
num of cols:
22
len of pred:
297
20221
number of 0s:

In [8]:
print(np.mean(regionsStats['accuracy_0']))
print(np.mean(regionsStats['accuracy_1']))
print(np.mean(regionsStats['accuracy_2']))
regionsStats

0.2426
nan
nan


Unnamed: 0,region,model,size,accuracy_0,accuracy_1,accuracy_2,cut_off_var
0,LPL,8,297,0.279,,,1.5
1,LCK,8,343,0.318,,,1.5
2,PCS,8,137,0.219,,,1.5
3,VCS,8,161,0.236,,,1.5
4,Ultraliga,8,62,0.161,,,1.5


### FEATURE SELECTION

In [9]:
regionsStats['accuracy_1'] = np.nan
regionsFeatureCols = dict(zip(regionsList,[0]*len(regionsList)))
for key in regionsFeatureCols:
    regionsFeatureCols[key] = featureCols.copy()

for n,region in enumerate(regionsToPredict):
    print('=========\n')
    regionFinalAcc = regionsStats['accuracy_0'][n]
    tempTournamentId = region+currentSemesterYear
    cut_off_var = regionsStats['cut_off_var'][n]
    region_model_number = regionsStats['model'][n]
    print(f'[{n+1} of {len(regionsToPredict)}] region {region} -> {regionFinalAcc}:\n')
    print(f'model: {region_model_number}')
    
    initialFeatures = regionsFeatureCols[region].copy()
    for nn,feature in enumerate(initialFeatures):
        regionsFeatureCols[region].remove(feature)
        
        metric, pred = generateMetric(region_model_number, regionsTrainData[region], regionsFeatureCols[region]
                                      , cut_off_var, tempTournamentId, currentTarget, dfToSplit, splitType)
        if metric < regionFinalAcc or np.isnan(regionFinalAcc):
            regionFinalAcc = metric
            print(f'{feature} removed for {metric}                                                ')
        else:
            regionsFeatureCols[region].append(feature)
            
        print(f'[{nn+1} of {len(initialFeatures)}] testing: {feature}                        ',end='\r')
    
    regionsStats['accuracy_1'][n] = regionFinalAcc
    print(f'\n\naccuracy: {regionFinalAcc}')
    print(f'{region} feature count: {len(regionsFeatureCols[region])}')
    print(f'test data len: {len(pred)}\n')
    
printFinalResults(regionsStats, 'accuracy_1')

hide_toggle()


[1 of 5] region LPL -> 0.279:

model: 8
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
22
len of pred:
297
20221 22] testing: Penta_Kills                        
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
22
len of pred:
297
20221 22] testing: Avg_WCPM                        
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
22
len of pred:
297
20221 22] testing: Win_rate                        
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
22
len of pred:
297
20221 22] testing: Avg_VWPM                        
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
20
len of pred:
297
KP% removed for 0.273                                                
20221 22] testing: KP%                        
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
20
len of pred:
297
20221 22] testing: Solo_Kills                    

### MODEL SELECTION

In [10]:
regionsStats['accuracy_2'] = np.nan

for n,region in enumerate(regionsToPredict):
    print('=========\n')
    currAcc = regionsStats['accuracy_0'][n]
    currModel = regionsStats['model'][n]
    tempTournamentId = region+currentSemesterYear
    cut_off_var = regionsStats['cut_off_var'][n]
    print(f'[{n+1} of {len(regionsToPredict)}] region {region} -> {currAcc}:\n')
    print(f'current model: {currModel}\n')
    
    bestModelAbs = (regionsStats[regionsStats['region']==region])['accuracy_2'].iloc[0]
    for model in range(len(base_models)):
        metricModelAbs, pred = generateMetric(model, regionsTrainData[region], regionsFeatureCols[region]
                                      , cut_off_var, tempTournamentId, currentTarget, dfToSplit, splitType)
        if metricModelAbs<bestModelAbs or np.isnan(bestModelAbs):
            bestModelAbs=metricModelAbs
            bestModel=model
        print(f'model {model} -> {metricModelAbs}')

    regionsStats['model'][n] = bestModel
    regionsStats['accuracy_2'][n] = bestModelAbs
    
    print(f'\naccuracy: {bestModelAbs}')
    print(f'best model: {bestModel}\n')
    
printFinalResults(regionsStats, 'accuracy_2')

hide_toggle()


[1 of 5] region LPL -> 0.279:

current model: 8

20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 0 -> 0.365
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 1 -> 0.495
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 2 -> 0.296
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 3 -> 0.37
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 4 -> 0.283
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 5 -> 0.457
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 6 -> 0.286
20221
number of 0s:
0.5426479949077021
[1 0]
len of train:
6284
6284
num of cols:
18
len of pred:
297
model 7 -> 0.263

In [11]:
print(np.mean(regionsStats['accuracy_0']))
print(np.mean(regionsStats['accuracy_1']))
print(np.mean(regionsStats['accuracy_2']))
regionsStats

0.2426
0.2364
0.2364


Unnamed: 0,region,model,size,accuracy_0,accuracy_1,accuracy_2,cut_off_var
0,LPL,7,297,0.279,0.263,0.263,1.5
1,LCK,7,343,0.318,0.318,0.318,1.5
2,PCS,7,137,0.219,0.204,0.204,1.5
3,VCS,7,161,0.236,0.236,0.236,1.5
4,Ultraliga,7,62,0.161,0.161,0.161,1.5


In [12]:
print(np.mean(regionsStats['accuracy_0']))
print(np.mean(regionsStats['accuracy_1']))
print(np.mean(regionsStats['accuracy_2']))
regionsStats

0.2426
0.2364
0.2364


Unnamed: 0,region,model,size,accuracy_0,accuracy_1,accuracy_2,cut_off_var
0,LPL,7,297,0.279,0.263,0.263,1.5
1,LCK,7,343,0.318,0.318,0.318,1.5
2,PCS,7,137,0.219,0.204,0.204,1.5
3,VCS,7,161,0.236,0.236,0.236,1.5
4,Ultraliga,7,62,0.161,0.161,0.161,1.5


# Notes

In [8]:
### DROP OUTLIERS

%%time

# regionsFeatureCols = dict(zip(regionsList,[0]*len(regionsList)))
# for key in regionsFeatureCols:
#     regionsFeatureCols[key] = featureCols.copy()

# for n,region in enumerate(regionsToPredict):
#     print('=========\n')
#     regionFinalAcc = regionsStats['accuracy_0'][n]
#     tempTournamentId = region+currentSemesterYear
#     cut_off_var = regionsStats['cut_off_var'][n]
#     region_model_number = regionsStats['model'][n]
#     print(f'[{n+1} of {len(regionsToPredict)}] region {region} -> {regionFinalAcc}:\n')
#     print(f'current var: {cut_off_var}')
#     for var in np.arange(1.0,2.0,0.1):
        
#         metric, pred = generateMetric(region_model_number, regionsTrainData[region], regionsFeatureCols[region]
#                                       , var, tempTournamentId, currentTarget, dfToSplit, splitType)
        
#         print(f'var: {round(var,2)}, metric: {metric}')
#         if metric < regionFinalAcc or np.isnan(regionFinalAcc):
#             regionFinalAcc = metric
#             regionsStats['cut_off_var'][n] = var
#             print(f'changed to {round(var,2)} cut-off for {metric}                                                ')
            
#         #print(f'[{nn+1} of {len(initialFeatures)}] testing: {feature}                        ',end='\r')
    
#     regionsStats['accuracy_0'][n] = regionFinalAcc
#     print(f'\n\naccuracy: {regionFinalAcc}')
#     print(f'test data len: {len(pred)}\n')
    
# printFinalResults(regionsStats, 'accuracy_0')

hide_toggle()


[1 of 27] region LPL -> 0.332:

current var: 1.5
var: 1.0, metric: 0.428
var: 1.1, metric: 0.351
var: 1.2, metric: 0.37
var: 1.3, metric: 0.358
var: 1.4, metric: 0.37
var: 1.5, metric: 0.347
var: 1.6, metric: 0.373
var: 1.7, metric: 0.354
var: 1.8, metric: 0.34
var: 1.9, metric: 0.384


accuracy: 0.332
test data len: 297


[2 of 27] region LCK -> 0.372:

current var: 1.5
var: 1.0, metric: 0.39
var: 1.1, metric: 0.408
var: 1.2, metric: 0.405
var: 1.3, metric: 0.386
var: 1.4, metric: 0.404
var: 1.5, metric: 0.405
var: 1.6, metric: 0.416
var: 1.7, metric: 0.379
var: 1.8, metric: 0.401
var: 1.9, metric: 0.4


accuracy: 0.372
test data len: 343


[3 of 27] region PCS -> 0.246:

current var: 1.5
var: 1.0, metric: 0.336
var: 1.1, metric: 0.258
var: 1.2, metric: 0.276
var: 1.3, metric: 0.266
var: 1.4, metric: 0.277
var: 1.5, metric: 0.263
var: 1.6, metric: 0.283
var: 1.7, metric: 0.248
var: 1.8, metric: 0.273
var: 1.9, metric: 0.27


accuracy: 0.246
test data len: 137


[4 of 27] region VCS -

var: 1.2, metric: 0.287
var: 1.3, metric: 0.265
var: 1.4, metric: 0.243
var: 1.5, metric: 0.246
var: 1.6, metric: 0.283
var: 1.7, metric: 0.281
var: 1.8, metric: 0.276
var: 1.9, metric: 0.261


accuracy: 0.226
test data len: 90


[25 of 27] region LVP -> 0.448:

current var: 1.5
var: 1.0, metric: 0.688
var: 1.1, metric: 0.688
var: 1.2, metric: 0.5
var: 1.3, metric: 0.508
var: 1.4, metric: 0.544
var: 1.5, metric: 0.518
var: 1.6, metric: 0.536
var: 1.7, metric: 0.492
var: 1.8, metric: 0.508
var: 1.9, metric: 0.526


accuracy: 0.448
test data len: 64


[26 of 27] region NA_Tier2 -> 0.812:

current var: 1.5
var: 1.0, metric: 0.896
var: 1.1, metric: 0.904
var: 1.2, metric: 0.892
var: 1.3, metric: 0.9
var: 1.4, metric: 0.892
var: 1.5, metric: 0.892
var: 1.6, metric: 0.879
var: 1.7, metric: 0.875
var: 1.8, metric: 0.946
var: 1.9, metric: 0.862


accuracy: 0.812
test data len: 80


[27 of 27] region MSI -> nan:

current var: 1.5


ValueError: Found array with 0 sample(s) (shape=(0, 44)) while a minimum of 1 is required by BaggingClassifier.