# Setup

In [9]:
import pandas as pd
import numpy as np
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
    
import json

from Utils.constants import *
import Utils.utils_file as utils_file
import Utils.model_file as model_file

import warnings
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

# Load

In [2]:
cache = False

Utils = utils_file.Utils_Class(target='Score'
                                ,default_model=7
                                ,model_type='binary'
                                ,cache_model=cache
                                ,cache_scraping=True)

Utils.TARGET_DF = Utils.TARGET_DF[Utils.TARGET_DF[Utils.TARGET]!=2]

regions_to_feed, regions_to_predict = Utils.region_lists()
regions_to_feed = list(set(regions_to_feed + regions_to_predict))

if cache:
    regions_feature_cols = Utils.regions_feature_cols
    regions_train_data = Utils.regions_train_data
    regions_stats = Utils.regions_stats
    regions_list = regions_stats['region']
    regions_stats['train_size'] = 0
    regions_stats['test_size'] = 0
    regions_stats = regions_stats[regions_stats['region'].isin(regions_to_predict)].reset_index(drop=True)

#regions_to_predict.remove('MSI')
#regions_to_predict.remove('World')

#regions_to_predict = ['MSI','World']

# SKLEARN

In [3]:
regions_stats = pd.DataFrame(columns=['region','model','test_size','train_size'])
regions_stats['region'] = regions_to_predict
regions_list = regions_stats['region']
regions_stats['model'] = Utils.DEFAULT_MODEL

regions_train_data = dict(zip(regions_to_predict,[[x] for x in regions_to_predict]))
regions_stats['accuracy_0'] = np.nan
regions_stats['accuracy_1'] = np.nan
regions_stats['accuracy_2'] = np.nan
regions_stats['cut_off_var'] = 1.5

regions_feature_cols = dict(zip(regions_list,[0]*len(regions_list)))
for key in regions_feature_cols:
    regions_feature_cols[key] = PLAYER_SIMPLE_FEATURE_COLS.copy()

### TRAIN DATA SELECTION

In [4]:
regions_train_data = dict(zip(regions_to_predict, [[x] for x in regions_to_predict]))
regions_stats['accuracy_0'] = np.nan

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    regionFinalAcc = regions_stats['accuracy_0'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {regionFinalAcc}:\n')
    
    regionsToTest = [x for x in regions_to_feed]
    regionsToTest.remove(region)
    random.shuffle(regionsToTest)
    for nn,regionToTest in enumerate(regionsToTest):
        regions_train_data[region].append(regionToTest)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=1)
        
        if metric < regionFinalAcc or np.isnan(regionFinalAcc):
            regionFinalAcc = metric
            
            print(f'{regionFinalAcc} -> {regionToTest} added                                           ')
        else:
            regions_train_data[region].remove(regionToTest)
    
    train_len = Utils.train_len
    regions_stats['accuracy_0'][n] = regionFinalAcc
    regions_stats['train_size'][n] = train_len
    regions_stats['test_size'][n] = len(pred)
    
    print(f'\naccuracy: {regionFinalAcc}')
    print(f'{region} train data: {regions_train_data[region]}\nlen: {train_len}')
    print(f'test data len: {len(pred)}\n')

mean_acc = np.mean(regions_stats['accuracy_0'])
print(mean_acc)


[1 of 28] region LPL -> nan:

0.347 -> CK added                                           
0.324 -> Baltic added                                           
0.305 -> CBLOL_Tier2 added                                           
0.304 -> NACL added                                           

accuracy: 0.304
LPL train data: ['LPL', 'CK', 'Baltic', 'CBLOL_Tier2', 'NACL']
len: 3642
test data len: 626


[2 of 28] region LCK -> nan:

0.39 -> LJL_Tier2 added                                           
0.378 -> MSI added                                           
0.369 -> Prime added                                           
0.36 -> Turkey_Tier2 added                                           
0.358 -> Demacia added                                           
0.354 -> Belgian added                                           

accuracy: 0.354
LCK train data: ['LCK', 'LJL_Tier2', 'MSI', 'Prime', 'Turkey_Tier2', 'Demacia', 'Belgian']
len: 3095
test data len: 444


[3 of 28] region PCS -> nan:

0.347

### FEATURE SELECTION

In [5]:
regions_stats['accuracy_1'] = np.nan
for key in regions_list:
    regions_feature_cols[key] = PLAYER_SIMPLE_FEATURE_COLS.copy()

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    regionFinalAcc = regions_stats['accuracy_0'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {regionFinalAcc}:\n')
    print(f'model: {region_model_number}')
    
    initialFeatures = regions_feature_cols[region].copy()
    for nn,feature in enumerate(initialFeatures):
        regions_feature_cols[region].remove(feature)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=1)
        
        if metric < regionFinalAcc or np.isnan(regionFinalAcc):
            regionFinalAcc = metric
            print(f'{feature} removed for {metric}                                                ')
        else:
            regions_feature_cols[region].append(feature)
    
    train_len = Utils.train_len
    regions_stats['accuracy_1'][n] = regionFinalAcc
    regions_stats['train_size'][n] = train_len

    print(f'\n\naccuracy: {regionFinalAcc}')
    print(f'{region} feature count: {len(regions_feature_cols[region])}')
    print(f'test data len: {len(pred)}\n')
    
mean_acc = np.mean(regions_stats['accuracy_1'])
mean_train = np.mean(regions_stats['train_size'])

print(mean_train)
print(mean_acc)



[1 of 28] region LPL -> 0.304:

model: 7
Penta_Kills removed for 0.299                                                


accuracy: 0.299
LPL feature count: 21
test data len: 626


[2 of 28] region LCK -> 0.354:

model: 7
DMG% removed for 0.336                                                
VSPM removed for 0.324                                                


accuracy: 0.324
LCK feature count: 20
test data len: 444


[3 of 28] region PCS -> 0.299:

model: 7


accuracy: 0.299
PCS feature count: 22
test data len: 274


[4 of 28] region VCS -> 0.363:

model: 7
Avg_deaths removed for 0.353                                                


accuracy: 0.353
VCS feature count: 21
test data len: 300


[5 of 28] region Ultraliga -> 0.454:

model: 7
KP% removed for 0.448                                                


accuracy: 0.448
Ultraliga feature count: 21
test data len: 174


[6 of 28] region LLA -> 0.329:

model: 7
KP% removed for 0.323                                                

### MODEL SELECTION

In [6]:
regions_stats['accuracy_2'] = np.nan

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    currAcc = regions_stats['accuracy_0'][n]
    currModel = regions_stats['model'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {currAcc}:\n')
    print(f'current model: {currModel}\n')
    
    bestModelAbs = (regions_stats[regions_stats['region']==region])['accuracy_2'].iloc[0]
    for model in range(len(Utils.BASE_MODELS)):
        metric, pred = Utils.generate_metric(model, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=5)
        if metric < bestModelAbs or np.isnan(bestModelAbs):
            bestModelAbs = metric
            bestModel = model
        print(f'model {model} -> {metric}')

    train_len = Utils.train_len
    regions_stats['train_size'][n] = train_len
    regions_stats['model'][n] = bestModel
    regions_stats['accuracy_2'][n] = bestModelAbs
    
    print(f'\naccuracy: {bestModelAbs}')
    print(f'best model: {bestModel}\n')
    
mean_acc = np.mean(regions_stats['accuracy_2'])
print(mean_acc)
mean_train = np.mean(regions_stats['train_size'])
print(mean_train)


[1 of 28] region LPL -> 0.304:

current model: 7

model 0 -> 0.358
model 1 -> 0.54
model 2 -> 0.556
model 3 -> 0.399
model 4 -> 0.403
model 5 -> 0.411
model 6 -> 0.343
model 7 -> 0.299

accuracy: 0.299
best model: 7


[2 of 28] region LCK -> 0.354:

current model: 7

model 0 -> 0.435
model 1 -> 0.43
model 2 -> 0.477
model 3 -> 0.486
model 4 -> 0.414
model 5 -> 0.437
model 6 -> 0.39
model 7 -> 0.324

accuracy: 0.324
best model: 7


[3 of 28] region PCS -> 0.299:

current model: 7

model 0 -> 0.376
model 1 -> 0.387
model 2 -> 0.343
model 3 -> 0.369
model 4 -> 0.376
model 5 -> 0.372
model 6 -> 0.38
model 7 -> 0.299

accuracy: 0.299
best model: 7


[4 of 28] region VCS -> 0.363:

current model: 7

model 0 -> 0.367
model 1 -> 0.463
model 2 -> 0.44
model 3 -> 0.493
model 4 -> 0.447
model 5 -> 0.48
model 6 -> 0.413
model 7 -> 0.353

accuracy: 0.353
best model: 7


[5 of 28] region Ultraliga -> 0.454:

current model: 7

model 0 -> 0.454
model 1 -> 0.471
model 2 -> 0.569
model 3 -> 0.46
model 

In [7]:
print(np.mean(regions_stats['accuracy_0']))

regions_stats

0.32260714285714276
0.3160357142857143
0.3140714285714286


Unnamed: 0,region,model,test_size,train_size,accuracy_0,accuracy_1,accuracy_2,cut_off_var
0,LPL,7,626,3551,0.304,0.299,0.299,1.5
1,LCK,7,444,3021,0.354,0.324,0.324,1.5
2,PCS,7,274,1155,0.299,0.299,0.299,1.5
3,VCS,7,300,3288,0.363,0.353,0.353,1.5
4,Ultraliga,4,174,3000,0.454,0.448,0.448,1.5
5,LLA,3,164,1982,0.329,0.317,0.305,1.5
6,SuperLiga_Tier2,5,115,2122,0.383,0.365,0.357,1.5
7,TCL,7,161,2531,0.286,0.28,0.28,1.5
8,LFL,7,368,1998,0.38,0.38,0.38,1.5
9,Prime,7,179,2429,0.346,0.346,0.346,1.5


In [10]:
Utils.save_model_cache(regions_stats, regions_feature_cols, regions_train_data)

Cache saved!


# Notes