# Setup

In [1]:
import pandas as pd
import numpy as np
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
    
import json

from Utils.constants import *
import Utils.utils_file as utils_file
import Utils.model_file as model_file

import warnings
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

# Load

In [2]:
cache = True

Utils = utils_file.Utils_Class(target='Score'
                                ,default_model=0
                                ,model_type='logistic'
                                ,cache_model=cache
                                ,cache_scraping=True)

Utils.TARGET_DF = Utils.TARGET_DF[Utils.TARGET_DF[Utils.TARGET]!=2]

regions_to_feed, regions_to_predict = Utils.region_lists()
regions_to_feed = list(set(regions_to_feed + regions_to_predict))

if cache:
    regions_feature_cols = Utils.regions_feature_cols
    regions_train_data = Utils.regions_train_data
    regions_stats = Utils.regions_stats

# SKLEARN

In [3]:
regions_stats = pd.DataFrame(columns=['region','model','test_size','train_size'])
regions_stats['region'] = regions_to_predict
regions_list = regions_stats['region']
regions_stats['model'] = Utils.DEFAULT_MODEL

regions_train_data = dict(zip(regions_to_predict,[[x] for x in regions_to_predict]))
regions_stats['error'] = np.nan
regions_stats['cut_off_var'] = 1.5
regions_stats['threshold'] = 0.35
regions_stats['len_lost'] = np.nan

regions_feature_cols = dict(zip(regions_list,[0]*len(regions_list)))
for key in regions_feature_cols:
    regions_feature_cols[key] = PLAYER_SIMPLE_FEATURE_COLS.copy()

### TRAIN DATA SELECTION

In [4]:
regions_train_data = dict(zip(regions_to_predict, [[x] for x in regions_to_predict]))

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    current_accuracy = regions_stats['error'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    Utils.logistic_threshold = regions_stats['threshold'][n]
    
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {current_accuracy}:\n')
    
    regionsToTest = [x for x in regions_to_feed]
    regionsToTest.remove(region)
    random.shuffle(regionsToTest)
    for nn,regionToTest in enumerate(regionsToTest):
        regions_train_data[region].append(regionToTest)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=1)
        
        if round(metric,2) < round(current_accuracy,2) or np.isnan(current_accuracy):
            current_accuracy = metric
            
            print(f'{current_accuracy} -> {regionToTest} added                                           ')
        else:
            regions_train_data[region].remove(regionToTest)
    
    train_len = Utils.train_len
    regions_stats['error'][n] = current_accuracy
    regions_stats['train_size'][n] = train_len
    regions_stats['test_size'][n] = len(pred)
    regions_stats['len_lost'][n] = Utils.len_lost
    
    print(f'\nlen lost: {Utils.len_lost}%')
    print(f'accuracy: {current_accuracy}')
    print(f'{region} train data: {regions_train_data[region]}\nlen: {train_len}')
    print(f'test data len: {len(pred)}\n')

mean_acc = np.mean(regions_stats['error'])
print(mean_acc)


[1 of 28] region LPL -> nan:

0.247 -> NLC added                                           
0.236 -> OPL added                                           
0.233 -> LJL added                                           
0.219 -> Baltic added                                           
0.212 -> MCR added                                           

len lost: 0.42%
accuracy: 0.212
LPL train data: ['LPL', 'NLC', 'OPL', 'LJL', 'Baltic', 'MCR']
len: 4058
test data len: 626


[2 of 28] region LCK -> nan:

0.388 -> REL added                                           
0.357 -> Hitpoint_Tier2 added                                           
0.344 -> Trinity added                                           
0.328 -> Asia added                                           
0.319 -> GLL added                                           
0.314 -> LMS added                                           
0.303 -> Demacia added                                           
0.288 -> Prime_Tier2 added                    

### FEATURE SELECTION

In [5]:
for key in regions_list:
    regions_feature_cols[key] = PLAYER_SIMPLE_FEATURE_COLS.copy()

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')

    current_accuracy = regions_stats['error'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    Utils.logistic_threshold = regions_stats['threshold'][n]

    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {current_accuracy}:\n')
    print(f'model: {region_model_number}')
    
    initialFeatures = regions_feature_cols[region].copy()
    for nn,feature in enumerate(initialFeatures):
        regions_feature_cols[region].remove(feature)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=1)
        if round(metric,2) < round(current_accuracy,2) or np.isnan(current_accuracy):
            current_accuracy = metric
            print(f'{feature} removed for {metric}                                                ')
        else:
            regions_feature_cols[region].append(feature)
    
    train_len = Utils.train_len
    regions_stats['error'][n] = current_accuracy
    regions_stats['train_size'][n] = train_len
    regions_stats['len_lost'][n] = Utils.len_lost

    print(f'\n\nlen lost: {Utils.len_lost}%')
    print(f'accuracy: {current_accuracy}')
    print(f'{region} feature count: {len(regions_feature_cols[region])}')
    print(f'test data len: {len(pred)}\n')
    
mean_acc = np.mean(regions_stats['error'])
print(mean_acc)


[1 of 28] region LPL -> nan:

model: 0
Games removed for 0.208                                                
Win_rate removed for 0.2                                                


len lost: 0.41%
accuracy: 0.2
LPL feature count: 20
test data len: 626


[2 of 28] region LCK -> nan:

model: 0
Games removed for 0.328                                                
GPM removed for 0.317                                                
GD@15 removed for 0.306                                                
CSD@15 removed for 0.3                                                
Solo_Kills removed for 0.273                                                
Avg_deaths removed for 0.256                                                
Avg_VWPM removed for 0.251                                                


len lost: 0.52%
accuracy: 0.251
LCK feature count: 15
test data len: 444


[3 of 28] region PCS -> nan:

model: 0
Games removed for 0.276                                                

### MODEL SELECTION

In [7]:
regions_stats['error'] = np.nan

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')

    current_accuracy = regions_stats['error'][n]
    currModel = regions_stats['model'][n]
    cut_off_var = regions_stats['cut_off_var'][n]
    Utils.logistic_threshold = regions_stats['threshold'][n]

    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {current_accuracy}:\n')
    print(f'current model: {currModel}\n')
    
    bestModelAbs = (regions_stats[regions_stats['region']==region])['error'].iloc[0]
    for model in range(len(Utils.BASE_MODELS)):
        metric, pred = Utils.generate_metric(model, regions_feature_cols[region]
                                             , regions_train_data[region], region, reps=5)
        if round(metric,2) < round(bestModelAbs,2) or np.isnan(bestModelAbs):
            bestModelAbs = metric
            bestModel = model
        print(f'model {model} -> {metric}')

    train_len = Utils.train_len
    regions_stats['train_size'][n] = train_len
    regions_stats['model'][n] = bestModel
    regions_stats['error'][n] = bestModelAbs
    regions_stats['len_lost'][n] = Utils.len_lost
    
    print(f'\nlen lost: {Utils.len_lost}%')
    print(f'accuracy: {bestModelAbs}')
    print(f'best model: {bestModel}\n')
    
mean_acc = np.mean(regions_stats['error'])
print(mean_acc)


[1 of 28] region LPL -> nan:

current model: 0

model 0 -> 0.2

len lost: 0.42%
accuracy: 0.2
best model: 0


[2 of 28] region LCK -> nan:

current model: 0

model 0 -> 0.251

len lost: 0.52%
accuracy: 0.251
best model: 0


[3 of 28] region PCS -> nan:

current model: 0

model 0 -> 0.154

len lost: 0.62%
accuracy: 0.154
best model: 0


[4 of 28] region VCS -> nan:

current model: 0

model 0 -> 0.232

len lost: 0.48%
accuracy: 0.232
best model: 0


[5 of 28] region Ultraliga -> nan:

current model: 0

model 0 -> 0.352

len lost: 0.69%
accuracy: 0.352
best model: 0


[6 of 28] region LLA -> nan:

current model: 0

model 0 -> 0.217

len lost: 0.63%
accuracy: 0.217
best model: 0


[7 of 28] region SuperLiga_Tier2 -> nan:

current model: 0

model 0 -> 0.323

len lost: 0.46%
accuracy: 0.323
best model: 0


[8 of 28] region TCL -> nan:

current model: 0

model 0 -> 0.21

len lost: 0.61%
accuracy: 0.21
best model: 0


[9 of 28] region LFL -> nan:

current model: 0

model 0 -> 0.298

len lost:

In [8]:
print(np.mean(regions_stats['error']))
regions_stats

0.202


Unnamed: 0,region,model,test_size,train_size,error,cut_off_var,threshold,len_lost
0,LPL,0,626,3987,0.2,1.5,0.35,0.42
1,LCK,0,444,3257,0.251,1.5,0.35,0.52
2,PCS,0,274,2237,0.154,1.5,0.35,0.62
3,VCS,0,300,4918,0.232,1.5,0.35,0.48
4,Ultraliga,0,174,2258,0.352,1.5,0.35,0.69
5,LLA,0,164,2244,0.217,1.5,0.35,0.63
6,SuperLiga_Tier2,0,115,2705,0.323,1.5,0.35,0.46
7,TCL,0,161,5966,0.21,1.5,0.35,0.61
8,LFL,0,368,3776,0.298,1.5,0.35,0.64
9,Prime,0,179,3280,0.207,1.5,0.35,0.68


In [9]:
Utils.save_model_cache(regions_stats, regions_feature_cols, regions_train_data)

AttributeError: 'Utils_Class' object has no attribute 'regions_cache'

# Notes