# Setup

In [2]:
import pandas as pd
import numpy as np
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
    
import json

from Utils.constants import *
from Utils.utils_file import Utils_Class

import warnings
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

# FUNCTIONS

# Load

In [2]:
target='Score'
split_type = 0
default_model = 7

Utils = Utils_Class(target=target,
                    split_type=split_type,
                    default_model=default_model)

Utils.TARGET_DF = Utils.TARGET_DF[(Utils.TARGET_DF[Utils.TARGET]!=2)
                                  & (Utils.TARGET_DF['realSemesterYear'].astype(int)<=int(Utils.CURRENT_SEMESTER_YEAR))]

regions_to_feed, regions_to_predict = Utils.region_lists(min_entries=30)
Utils.define_models('binary')

#regions_to_predict.remove('MSI')
#regions_to_predict.remove('World')
regions_to_predict = regions_to_predict[:5]
#regions_to_predict = ['MSI','World']

# SKLEARN

In [3]:
#
regions_stats = pd.DataFrame(columns=['region','model','size'])
regions_stats['region'] = regions_to_predict
regions_list = regions_stats['region']
regions_stats['model'] = Utils.DEFAULT_MODEL

regions_train_data = dict(zip(regions_list,regions_list.apply(lambda x: [x])))
regions_stats['accuracy_0'] = np.nan
regions_stats['accuracy_1'] = np.nan
regions_stats['accuracy_2'] = np.nan
regions_stats['cut_off_var'] = 1.5

regions_feature_cols = dict(zip(regions_list,[0]*len(regions_list)))
for key in regions_feature_cols:
    regions_feature_cols[key] = TEAM_SIMPLE_FEATURE_COLS.copy()

### TRAIN DATA SELECTION

In [4]:
regions_train_data = dict(zip(regions_list,regions_list.apply(lambda x: [x])))
regions_stats['accuracy_0'] = np.nan

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    regionFinalAcc = regions_stats['accuracy_0'][n]
    tempTournamentId = region + Utils.CURRENT_SEMESTER_YEAR
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {regionFinalAcc}:\n')
    
    regionsToTest = [x for x in regions_to_feed]
    regionsToTest.remove(region)
    random.shuffle(regionsToTest)
    for nn,regionToTest in enumerate(regionsToTest):
        regions_train_data[region].append(regionToTest)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], tempTournamentId, reps=5)
        
        if metric < regionFinalAcc or np.isnan(regionFinalAcc):
            regionFinalAcc = metric
            
            print(f'{regionFinalAcc} -> {regionToTest} added                                           ')
        else:
            regions_train_data[region].remove(regionToTest)
    
    regions_stats['accuracy_0'][n] = regionFinalAcc
    regions_stats['size'][n] = len(pred)
    
    print(f'\n\naccuracy: {regionFinalAcc}')
    print(f'{region} train data: {regions_train_data[region]}\nlen:{len(regions_train_data[region])}')
    print(f'test data len: {len(pred)}\n')

mean_acc = np.mean(regions_stats['accuracy_0'])
print(mean_acc)


[1 of 5] region LPL -> nan:

0.331 -> VCS added                                           
0.328 -> LCS added                                           
0.322 -> OPL added                                           
0.313 -> LCO added                                           
0.307 -> LCK added                                           


KeyboardInterrupt: 

### FEATURE SELECTION

In [14]:
regions_stats['accuracy_1'] = np.nan
regions_feature_cols = dict(zip(regions_list,[0]*len(regions_list)))
for key in regions_feature_cols:
    regions_feature_cols[key] = TEAM_SIMPLE_FEATURE_COLS.copy()

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    regionFinalAcc = regions_stats['accuracy_0'][n]
    tempTournamentId = region + Utils.CURRENT_SEMESTER_YEAR
    cut_off_var = regions_stats['cut_off_var'][n]
    region_model_number = regions_stats['model'][n]
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {regionFinalAcc}:\n')
    print(f'model: {region_model_number}')
    
    initialFeatures = regions_feature_cols[region].copy()
    for nn,feature in enumerate(initialFeatures):
        regions_feature_cols[region].remove(feature)
        
        metric, pred = Utils.generate_metric(region_model_number, regions_feature_cols[region]
                                             , regions_train_data[region], tempTournamentId, reps=5)
        
        if metric < regionFinalAcc or np.isnan(regionFinalAcc):
            regionFinalAcc = metric
            print(f'{feature} removed for {metric}                                                ')
        else:
            regions_feature_cols[region].append(feature)
    
    regions_stats['accuracy_1'][n] = regionFinalAcc
    print(f'\n\naccuracy: {regionFinalAcc}')
    print(f'{region} feature count: {len(regions_feature_cols[region])}')
    print(f'test data len: {len(pred)}\n')
    
mean_acc = np.mean(regions_stats['accuracy_1'])
print(mean_acc)


[1 of 5] region LPL -> 0.35:

model: 7
GPM removed for 0.316                                                
GD@15 removed for 0.307                                                


accuracy: 0.307
LPL feature count: 20
test data len: 329


[2 of 5] region LCK -> 0.298:

model: 7


accuracy: 0.298
LCK feature count: 22
test data len: 248


[3 of 5] region PCS -> 0.314:

model: 7
GPM removed for 0.305                                                


accuracy: 0.305
PCS feature count: 21
test data len: 105


[4 of 5] region VCS -> 0.283:

model: 7
DPM removed for 0.261                                                


accuracy: 0.261
VCS feature count: 21
test data len: 92


[5 of 5] region Ultraliga -> 0.296:

model: 7


accuracy: 0.296
Ultraliga feature count: 22
test data len: 108

0.2934


### MODEL SELECTION

In [15]:
regions_stats['accuracy_2'] = np.nan

##########

for n,region in enumerate(regions_to_predict):
    print('=========\n')
    currAcc = regions_stats['accuracy_0'][n]
    currModel = regions_stats['model'][n]
    tempTournamentId = region + Utils.CURRENT_SEMESTER_YEAR
    cut_off_var = regions_stats['cut_off_var'][n]
    print(f'[{n+1} of {len(regions_to_predict)}] region {region} -> {currAcc}:\n')
    print(f'current model: {currModel}\n')
    
    bestModelAbs = (regions_stats[regions_stats['region']==region])['accuracy_2'].iloc[0]
    for model in range(len(Utils.BASE_MODELS)):
        metric, pred = Utils.generate_metric(model, regions_feature_cols[region]
                                             , regions_train_data[region], tempTournamentId, reps=5)
        if metric < bestModelAbs or np.isnan(bestModelAbs):
            bestModelAbs = metric
            bestModel = model
        print(f'model {model} -> {metric}')

    regions_stats['model'][n] = bestModel
    regions_stats['accuracy_2'][n] = bestModelAbs
    
    print(f'\naccuracy: {bestModelAbs}')
    print(f'best model: {bestModel}\n')
    
mean_acc = np.mean(regions_stats['accuracy_2'])
print(mean_acc)


[1 of 5] region LPL -> 0.35:

current model: 7

model 0 -> 0.368
model 1 -> 0.386
model 2 -> 0.325
model 3 -> 0.431
model 4 -> 0.462
model 5 -> 0.458
model 6 -> 0.307
model 7 -> 0.307

accuracy: 0.307
best model: 6


[2 of 5] region LCK -> 0.298:

current model: 7

model 0 -> 0.333
model 1 -> 0.464
model 2 -> 0.327
model 3 -> 0.35
model 4 -> 0.347
model 5 -> 0.395
model 6 -> 0.306
model 7 -> 0.298

accuracy: 0.298
best model: 7


[3 of 5] region PCS -> 0.314:

current model: 7

model 0 -> 0.345
model 1 -> 0.295
model 2 -> 0.362
model 3 -> 0.339
model 4 -> 0.446
model 5 -> 0.43
model 6 -> 0.333
model 7 -> 0.305

accuracy: 0.295
best model: 1


[4 of 5] region VCS -> 0.283:

current model: 7

model 0 -> 0.38
model 1 -> 0.467
model 2 -> 0.391
model 3 -> 0.407
model 4 -> 0.337
model 5 -> 0.504
model 6 -> 0.293
model 7 -> 0.261

accuracy: 0.261
best model: 7


[5 of 5] region Ultraliga -> 0.296:

current model: 7

model 0 -> 0.35
model 1 -> 0.426
model 2 -> 0.306
model 3 -> 0.367
model 4 -

In [None]:
with open(f'../Data/raw_data/regions_feature_cols.json', 'w') as fp:
    json.dump(regions_feature_cols)
with open(f'../Data/raw_data/regions_train_data.json', 'w') as fp:
    json.dump(regions_train_data)

# Testing

In [16]:
print(np.mean(regions_stats['accuracy_0']))
print(np.mean(regions_stats['accuracy_1']))
print(np.mean(regions_stats['accuracy_2']))
regions_stats

0.3082
0.2934
0.2914


Unnamed: 0,region,model,size,accuracy_0,accuracy_1,accuracy_2,cut_off_var
0,LPL,6,329,0.35,0.307,0.307,1.5
1,LCK,7,248,0.298,0.298,0.298,1.5
2,PCS,1,105,0.314,0.305,0.295,1.5
3,VCS,7,92,0.283,0.261,0.261,1.5
4,Ultraliga,7,108,0.296,0.296,0.296,1.5


In [16]:
def get_team_names(name):
    tempDf = Utils.team_data_table[(Utils.team_data_table['Name']==name)
                           & (Utils.team_data_table['Year'].astype(int)==Utils.CURRENT_YEAR)
                           & (Utils.team_data_table['Semester'].astype(int)==Utils.CURRENT_SEMESTER)]
    
    namesList = tempDf[['TOP','JNG','MID','ADC','SUP']].iloc[0]
    
    return namesList

def get_feature_team_mean(namesList,feature):
    values=[]
    noDataList=[]
    for name in namesList:
        filteredTempDf = Utils.player_data_table[(Utils.player_data_table['Player']==name)
                                             & (Utils.player_data_table['Year']==Utils.CURRENT_YEAR)
                                             & (Utils.player_data_table['Semester']==Utils.CURRENT_SEMESTER)]
        
        valueToAppend = filteredTempDf[feature.replace('Team_Red_','').replace('Team_Blue_','')]
        if len(valueToAppend)>0:
            values.append(valueToAppend.iloc[0])
        else: 
            noDataList.append(name)
    
    return np.mean(values),noDataList


In [17]:
regionTest = 'CBLOL'
dfTemp = Utils.TARGET_DF[Utils.TARGET_DF['regionAbrev']==regionTest]
teamsSet = sorted(set(list(dfTemp['Blue'].unique())+list(dfTemp['Red'].unique())))
teamsDict = dict(zip(range(len(teamsSet)),teamsSet))
teamsDict

{0: 'CNB e-Sports Club',
 1: 'FURIA Esports',
 2: 'FURIA Uppercut',
 3: 'Flamengo Los Grandes',
 4: 'Flamengo eSports',
 5: 'Fluxo',
 6: 'INTZ e-Sports',
 7: 'INTZ eSports',
 8: 'KaBuM! e-Sports',
 9: 'LOUD',
 10: 'Liberty',
 11: 'Los Grandes',
 12: 'Netshoes Miners',
 13: 'Prodigy Esports',
 14: 'RED Canids',
 15: 'Redemption POA',
 16: 'Rensga eSports',
 17: 'Santos e-Sports',
 18: 'Team oNe eSports',
 19: 'Uppercut eSports',
 20: 'Vivo Keyd',
 21: 'Vivo Keyd Stars',
 22: 'Vorax Liberty',
 23: 'paiN Gaming'}

In [79]:
team0 = 1
team1 = 10

playerNames = Utils.team_data_table[(Utils.team_data_table['Name']==teamsDict[team0])
                            & (Utils.team_data_table['Year'].astype(int)==Utils.CURRENT_YEAR)
                            & (Utils.team_data_table['Semester'].astype(int)==Utils.CURRENT_SEMESTER)][['TOP','JNG','MID','ADC','SUP']]

try: print(f'teamBlueNames = {list(playerNames.values[0])}')
except: print('no team found')

playerNames = Utils.team_data_table[(Utils.team_data_table['Name']==teamsDict[team1])
                            & (Utils.team_data_table['Year'].astype(int)==Utils.CURRENT_YEAR)
                            & (Utils.team_data_table['Semester'].astype(int)==Utils.CURRENT_SEMESTER)][['TOP','JNG','MID','ADC','SUP']]

try:print(f'teamRedNames = {list(playerNames.values[0])}')
except: print('no team found')

teamBlueNames = ['fNb', 'Goot', 'Envy', 'Netuno', 'RedBert']
teamRedNames = ['Kiari', 'Disamis', 'Krastyel', 'Cavalo', 'Matsukaze']


In [80]:
manualNameInsert = 1

for i in range(5):
    print('=================')
    for swap in [0,1]:
        #teams
        teamBlueNames = ['fNb', 'Goot', 'Envy', 'Trigo', 'RedBert']
        teamRedNames = ['Kiari', 'accez', 'Piloto', 'Juliera', 'Cavalo']
        teamBlueTest = teamsDict[team0]
        teamRedTest = teamsDict[team1]
        if swap==1:
                teamTemp = teamBlueTest
                teamBlueTest = teamRedTest
                teamRedTest = teamTemp

                teamNamesTemp = teamBlueNames
                teamBlueNames = teamRedNames
                teamRedNames = teamNamesTemp

        #generate train data
        finalDfInput = Utils.TARGET_DF[Utils.TARGET_DF['regionAbrev'].isin(regions_train_data[regionTest])].copy()
        finalDfInput = finalDfInput.sort_values(by='Date',ascending=True)
        for col in finalDfInput.columns:
                    finalDfInput[col].fillna(0,inplace=True)

        featureColsFiltered = [x for x in list(finalDfInput.columns) 
                               if x.replace('Team_Blue_','').replace('Team_Red_','') in regions_feature_cols[regionTest]]
        
        xdata= finalDfInput[featureColsFiltered]
        ydata = finalDfInput[Utils.TARGET]

        #generate features to predict
        if manualNameInsert==0:
            teamBlueNames = get_team_names(teamBlueTest)
            teamRedNames = get_team_names(teamRedTest)

        inputFeatures = Utils.TARGET_DF.drop([Utils.TARGET+'Data']+OFF_COLS,axis=1,errors='ignore').columns
        featuresDict = {}
        for feature in featureColsFiltered:
            side = feature.split('_')[1]
            if side == 'Blue':
                featuresDict[feature],noDataNames = get_feature_team_mean(teamBlueNames,feature)
                
            elif side == 'Red':
                featuresDict[feature],noDataNames = get_feature_team_mean(teamRedNames,feature)
        print(f'no data names on: {noDataNames}')

        inputDf = pd.DataFrame(featuresDict.values(),index=featuresDict.keys()).transpose()
        for col in inputDf.columns:
            inputDf[col].fillna(0,inplace=True)

        #model and prediction
        modelNum = (regions_stats[regions_stats['region']==regionTest])['model'].iloc[0]
        model = Utils.BASE_MODELS[modelNum]
        model.fit(xdata,ydata)
        prediction = teamBlueTest if model.predict(inputDf)==0 else teamRedTest
        print(prediction)

no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports
no data names on: []
FURIA Esports


# Notes