# Setup

In [1]:
import pandas as pd
import numpy as np
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
    
import json

from Utils.constants import *
import Utils.utils_file as utils_file
import Utils.model_file as model_file

import warnings
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

# Load

In [2]:
cache = True

Utils = utils_file.Utils_Class(target='Score'
                                ,default_model=0
                                ,model_type='logistic'
                                ,cache_model=cache
                                ,cache_scraping=True)

Utils.TARGET_DF = Utils.TARGET_DF[Utils.TARGET_DF[Utils.TARGET]!=2]

regions_to_feed, regions_to_predict = Utils.region_lists()
regions_to_feed = list(set(regions_to_feed + regions_to_predict))

if cache:
    try:    
        regions_feature_cols = Utils.regions_feature_cols
        regions_train_data = Utils.regions_train_data
        regions_stats = Utils.regions_stats
    except:
        print('files not found')

No data found for season 20230 and model logistic


AttributeError: 'Utils_Class' object has no attribute 'regions_feature_cols'

# SKLEARN

In [3]:
regions_stats = pd.DataFrame(columns=['region','model','test_size','train_size'])
regions_stats['region'] = regions_to_predict
regions_list = regions_stats['region']
regions_stats['model'] = Utils.DEFAULT_MODEL

regions_train_data = dict(zip(regions_to_predict,[[x] for x in regions_to_predict]))
regions_stats['error'] = np.nan
regions_stats['cut_off_var'] = 1.5
regions_stats['threshold'] = 0.35
regions_stats['len_lost'] = np.nan

regions_feature_cols = dict(zip(regions_list,[0]*len(regions_list)))
for key in regions_feature_cols:
    regions_feature_cols[key] = PLAYER_SIMPLE_FEATURE_COLS.copy()

### TRAIN DATA SELECTION

In [4]:
regions_train_data = dict(zip(regions_to_predict, [[x] for x in regions_to_predict]))

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    current_accuracy = regions_stats['error'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    Utils.logistic_threshold = regions_stats['threshold'][n]
    
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {current_accuracy}:\n')
    
    regionsToTest = [x for x in regions_to_feed]
    regionsToTest.remove(region)
    random.shuffle(regionsToTest)
    for nn,regionToTest in enumerate(regionsToTest):
        regions_train_data[region].append(regionToTest)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=1)
        
        if round(metric,2) < round(current_accuracy,2) or np.isnan(current_accuracy):
            current_accuracy = metric
            
            print(f'{current_accuracy} -> {regionToTest} added                                           ')
        else:
            regions_train_data[region].remove(regionToTest)
    
    train_len = Utils.train_len
    regions_stats['error'][n] = current_accuracy
    regions_stats['train_size'][n] = train_len
    regions_stats['test_size'][n] = len(pred)
    regions_stats['len_lost'][n] = Utils.len_lost
    
    print(f'\nlen lost: {Utils.len_lost}%')
    print(f'accuracy: {current_accuracy}')
    print(f'{region} train data: {regions_train_data[region]}\nlen: {train_len}')
    print(f'test data len: {len(pred)}\n')

mean_acc = np.mean(regions_stats['error'])
print(mean_acc)


[1 of 30] region LFL -> nan:

0.438 -> BRCC added                                           
0.22 -> MSI added                                           
0.212 -> Demacia added                                           
0.196 -> SuperLiga added                                           
0.193 -> LCO added                                           
0.179 -> Prime added                                           
0.173 -> LCS added                                           
0.165 -> Elite_Tier2 added                                           

len lost: 0.29%
accuracy: 0.165
LFL train data: ['LFL', 'BRCC', 'MSI', 'Demacia', 'SuperLiga', 'LCO', 'Prime', 'LCS', 'Elite_Tier2']
len: 3942
test data len: 331


[2 of 30] region LPL -> nan:

0.243 -> Iberian added                                           
0.231 -> EU added                                           
0.188 -> GLL added                                           
0.149 -> PCS added                                           
0.142 -

### FEATURE SELECTION

In [5]:
for key in regions_list:
    regions_feature_cols[key] = PLAYER_SIMPLE_FEATURE_COLS.copy()

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')

    current_accuracy = regions_stats['error'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    Utils.logistic_threshold = regions_stats['threshold'][n]

    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {current_accuracy}:\n')
    print(f'model: {region_model_number}')
    
    initialFeatures = regions_feature_cols[region].copy()
    for nn,feature in enumerate(initialFeatures):
        regions_feature_cols[region].remove(feature)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=1)
        if round(metric,2) < round(current_accuracy,2) or np.isnan(current_accuracy):
            current_accuracy = metric
            print(f'{feature} removed for {metric}                                                ')
        else:
            regions_feature_cols[region].append(feature)
    
    train_len = Utils.train_len
    regions_stats['error'][n] = current_accuracy
    regions_stats['train_size'][n] = train_len
    regions_stats['len_lost'][n] = Utils.len_lost

    print(f'\n\nlen lost: {Utils.len_lost}%')
    print(f'accuracy: {current_accuracy}')
    print(f'{region} feature count: {len(regions_feature_cols[region])}')
    print(f'test data len: {len(pred)}\n')
    
mean_acc = np.mean(regions_stats['error'])
print(mean_acc)


[1 of 30] region LFL -> 0.165:

model: 0


len lost: 0.28%
accuracy: 0.165
LFL feature count: 22
test data len: 331


[2 of 30] region LPL -> 0.132:

model: 0


len lost: 0.29%
accuracy: 0.132
LPL feature count: 22
test data len: 586


[3 of 30] region Prime_Tier2 -> 0.0:

model: 0


len lost: 0.04%
accuracy: 0.0
Prime_Tier2 feature count: 22
test data len: 52


[4 of 30] region Ultraliga -> 0.048:

model: 0
CSD@15 removed for 0.045                                                
Avg_kills removed for 0.034                                                


len lost: 0.63%
accuracy: 0.034
Ultraliga feature count: 20
test data len: 171


[5 of 30] region SuperLiga -> 0.0:

model: 0


len lost: 0.0%
accuracy: 0.0
SuperLiga feature count: 22
test data len: 52


[6 of 30] region MSI -> 0.0:

model: 0


len lost: 0.0%
accuracy: 0.0
MSI feature count: 22
test data len: 25


[7 of 30] region EMEA -> 0.0:

model: 0


len lost: 0.45%
accuracy: 0.0
EMEA feature count: 22
test data len: 69


[8 o

### MODEL SELECTION

In [6]:
regions_stats['error'] = np.nan

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')

    current_accuracy = regions_stats['error'][n]
    currModel = regions_stats['model'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    Utils.logistic_threshold = regions_stats['threshold'][n]

    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {current_accuracy}:\n')
    print(f'current model: {currModel}\n')
    
    bestModelAbs = (regions_stats[regions_stats['region']==region])['error'].iloc[0]
    for model in range(len(Utils.BASE_MODELS)):
        metric, pred = Utils.generate_metric(model, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=5)
        if round(metric,2) < round(bestModelAbs,2) or np.isnan(bestModelAbs):
            bestModelAbs = metric
            bestModel = model
        print(f'model {model} -> {metric}')

    train_len = Utils.train_len
    regions_stats['train_size'][n] = train_len
    regions_stats['model'][n] = bestModel
    regions_stats['error'][n] = bestModelAbs
    regions_stats['len_lost'][n] = Utils.len_lost
    
    print(f'\nlen lost: {Utils.len_lost}%')
    print(f'accuracy: {bestModelAbs}')
    print(f'best model: {bestModel}\n')
    
mean_acc = np.mean(regions_stats['error'])
print(mean_acc)


[1 of 30] region LFL -> nan:

current model: 0

model 0 -> 0.165
model 1 -> 0.167
model 2 -> 0.231
model 3 -> 0.238
model 4 -> 0.167
model 5 -> 0.169
model 6 -> 0.185
model 7 -> 0.178
model 8 -> 0.25
model 9 -> 0.25
model 10 -> 0.289
model 11 -> 0.289
model 12 -> 0.289

len lost: 0.77%
accuracy: 0.165
best model: 0


[2 of 30] region LPL -> nan:

current model: 0

model 0 -> 0.132
model 1 -> 0.139
model 2 -> 0.133
model 3 -> 0.154
model 4 -> 0.147
model 5 -> 0.138
model 6 -> 0.139
model 7 -> 0.133
model 8 -> 0.299
model 9 -> 0.299
model 10 -> 0.303
model 11 -> 0.303
model 12 -> 0.303

len lost: 0.87%
accuracy: 0.132
best model: 0


[3 of 30] region Prime_Tier2 -> nan:

current model: 0

model 0 -> 0.0
model 1 -> 0.0
model 2 -> 0.0
model 3 -> 0.0
model 4 -> 0.0
model 5 -> 0.02
model 6 -> 0.02
model 7 -> 0.0
model 8 -> 0.0
model 9 -> 0.0
model 10 -> 0.0
model 11 -> 0.0
model 12 -> 0.0

len lost: 0.65%
accuracy: 0.0
best model: 0


[4 of 30] region Ultraliga -> nan:

current model: 0

mo

In [7]:
print(np.mean(regions_stats['error']))
regions_stats

0.06436666666666667


Unnamed: 0,region,model,test_size,train_size,error,cut_off_var,threshold,len_lost
0,LFL,0,331,3890,0.165,1.5,0.35,0.77
1,LPL,0,586,5264,0.132,1.5,0.35,0.87
2,Prime_Tier2,0,52,725,0.0,1.5,0.35,0.65
3,Ultraliga,0,171,3229,0.034,1.5,0.35,0.76
4,SuperLiga,0,52,323,0.0,1.5,0.35,0.0
5,MSI,0,25,1048,0.0,1.5,0.35,0.88
6,EMEA,0,69,3258,0.0,1.5,0.35,0.93
7,LCO,0,143,5523,0.08,1.5,0.35,0.4
8,VCS,0,254,2267,0.164,1.5,0.35,0.5
9,LEC,0,200,3990,0.085,1.5,0.35,0.88


In [8]:
Utils.save_model_cache(regions_stats, regions_feature_cols, regions_train_data)

Cache saved!


# Notes