# Setup

In [1]:
import Utils.model_file as model_file
import Utils.utils_file as utils_file
import Utils.scraping_file as scraping_file
from Utils.constants import *

import numpy as np
import pandas as pd
import sklearn.metrics as skm

In [2]:
Model = model_file.Final_Model_Class(target='Score', model_type='logistic')
Scraping = scraping_file.Scraping_Class()
Utils = Model.Utils
Utils.TARGET_DF = Utils.TARGET_DF[Utils.TARGET_DF[Utils.TARGET]!=2]

In [3]:
validation_df = Utils.TARGET_DF.copy()
validation_df = validation_df[validation_df['realSemesterYear'] == CURRENT_YEAR_SEMESTER]
print(validation_df.Date.max())

2022-10-04 00:00:00


In [4]:
print(np.mean(Utils.regions_stats['error']))
Utils.regions_stats.head()

0.1905806451612903


Unnamed: 0,region,model,test_size,train_size,error,cut_off_var,threshold,len_lost
0,Prime,0,478,2388,0.283,1.5,0.35,0.55
1,LVP,0,368,3953,0.272,1.5,0.35,0.63
2,NLC,10,255,2081,0.154,1.5,0.35,0.59
3,LVP2,0,5,1374,0.0,1.5,0.35,0.6
4,Ultraliga,0,319,3031,0.193,1.5,0.35,0.45


# Auto validation

In [29]:
final_df = pd.DataFrame()
regions_stats_proof = pd.DataFrame(columns=['region','error','len lost','len', 'true_len'])
perc_val_df = pd.DataFrame(columns=['predicted','true'])
Utils.logistic_threshold = 0.25
final_len = 0

for tournament in validation_df.tournamentId.unique():
    print('============')
    print(f'Tournament: {tournament}')

    region_abrev = tournament[:-5]
    if region_abrev not in Utils.regions_feature_cols:
        continue
    if region_abrev == 'VCS':
        continue

    old_error = Utils.regions_stats[Utils.regions_stats['region']==region_abrev].error.iloc[0]
    target_df = Utils.TARGET_DF.copy()
    feature_columns = Utils.regions_feature_cols[region_abrev]
    train_data = Utils.regions_train_data[region_abrev]
    target_columns = [x for x in list(target_df.columns) if x.replace('Team_Red_','').replace('Team_Blue_','') in feature_columns]

    train_df = target_df[target_df['regionAbrev'].isin(train_data)].copy()
    train_df = train_df[train_df['realSemesterYear'].astype(int) < int(CURRENT_YEAR_SEMESTER)]
    train_df = train_df[target_columns + Utils.INFO_COLS]
    xtrain = train_df.drop(['Date', Utils.TARGET] + OFF_COLS, axis=1, errors='ignore').copy()
    ytrain = train_df[Utils.TARGET]

    prediction_df = validation_df[validation_df['tournamentId'] == tournament].copy()
    prediction_df = prediction_df[target_columns + Utils.INFO_COLS]
    xtest = prediction_df.drop(['Date', Utils.TARGET] + OFF_COLS, axis=1, errors='ignore').copy()
    ytest = prediction_df[Utils.TARGET]
    
    model_number = Utils.regions_stats[Utils.regions_stats.region == region_abrev].model.iloc[0]

    metric, prediction = Utils.make_pred(model_number=model_number, xtrain=xtrain, ytrain=ytrain, xtest=xtest, ytest=ytest, reps=1)
    true_len = round(len(ytest)*abs(Utils.len_lost-1))

    df_to_concat = pd.DataFrame([prediction, ytest]).transpose()
    df_to_concat['region'] = region_abrev
    final_df = pd.concat([final_df,df_to_concat])

    df_to_concat = pd.DataFrame([region_abrev, metric, Utils.len_lost, len(ytest), true_len]).transpose()
    df_to_concat.columns = ['region','error','len lost','len','true_len']
    regions_stats_proof = pd.concat([regions_stats_proof,df_to_concat])

    df_temp = pd.DataFrame(dict({'predicted':prediction,'true':ytest}))
    df_temp['region'] = region_abrev
    perc_val_df = pd.concat([perc_val_df, df_temp])

    print(f'\nExpected error: {old_error}')
    print(f'Real error: {metric}')
    print(f'Len: {len(ytest)}')
    print(f'Len lost: {Utils.len_lost}')
    print(f'True len: {true_len}')


final_df_filter = final_df[(final_df[0]<=Utils.logistic_threshold) | (final_df[0]>=1-Utils.logistic_threshold)]
final_df_filter[0] = final_df_filter[0].round()
final_df_filter['error'] = final_df_filter[0]!=final_df_filter[1]
mean_real_error = skm.mean_absolute_error(round(final_df_filter[0]), final_df_filter[1])
all_len_lost = len(final_df) - len(final_df_filter)
regions_stats_proof = regions_stats_proof[regions_stats_proof['true_len']>0]

perc_val_df['round'] = perc_val_df['predicted'].round().astype(int)
perc_val_df['result'] = (perc_val_df['true'] == perc_val_df['round']).replace({True:1,False:0})
perc_val_df['predicted'] = perc_val_df['predicted'].apply(lambda x: 1-x if x<0.5 else x)
perc_val_df.reset_index(drop=True,inplace=True)

print('\n===============\n')
print(f'Mean real error: {mean_real_error}')
print(f'Len lost: {all_len_lost}')
print(f'Start len: {len(final_df)}')
print(f'Final len: {len(final_df_filter)}')
print(f'last train data: {Model.last_game_date}')


Tournament: World20221
Tournament: EU20221

Expected error: 0.23
Real error: 0.286
Len: 135
Len lost: 0.74
True len: 35
Tournament: LCS20221

Expected error: 0.27
Real error: 0.22
Len: 146
Len lost: 0.66
True len: 50
Tournament: LEC20221

Expected error: 0.304
Real error: 0.188
Len: 123
Len lost: 0.74
True len: 32
Tournament: CBLOL_Tier220221

Expected error: 0.163
Real error: 0.059
Len: 105
Len lost: 0.84
True len: 17
Tournament: TCL20221

Expected error: 0.149
Real error: 0.087
Len: 112
Len lost: 0.79
True len: 24
Tournament: LCO20221

Expected error: 0.164
Real error: 0.183
Len: 106
Len lost: 0.33
True len: 71
Tournament: LJL20221

Expected error: 0.233
Real error: 0.125
Len: 107
Len lost: 0.48
True len: 56
Tournament: LPL20221

Expected error: 0.245
Real error: 0.187
Len: 408
Len lost: 0.58
True len: 171
Tournament: PCS20221

Expected error: 0.113
Real error: 0.176
Len: 137
Len lost: 0.38
True len: 85
Tournament: VCS20221
Tournament: CBLOL20221

Expected error: 0.304
Real error: 0.

In [31]:
perc_val_result_df = pd.DataFrame()
for reg in validation_df.tournamentId.apply(lambda x: x[:-5]).unique():
    print('\n======================================\n')
    print(f'region: {reg}')
    for x in np.arange(0.5,0.9,0.1):
        print('==========')
        print(f'threshold: {round(x,2)}')
        threshold = x
        test_df = perc_val_df[(perc_val_df['predicted']>=threshold)
                                        & (perc_val_df['predicted']<=threshold+0.1)
                                        & (perc_val_df['region']==reg)]
        
        if len(test_df.result.unique())>1:
            result = round(test_df.result.value_counts()[1]/len(test_df),2)
            print(f'result: {round(test_df.result.value_counts()[1]/len(test_df),2)}')
            perc_val_result_df = perc_val_result_df.append(pd.Series([reg, round(threshold,2), result, len(test_df)]), ignore_index=True)

        print(f'len: {len(test_df)}')

perc_val_result_df.columns = ['region','threshold','result','len']



region: World
threshold: 0.5
len: 0
threshold: 0.6
len: 0
threshold: 0.7
len: 0
threshold: 0.8
len: 0


region: EU
threshold: 0.5
result: 0.57
len: 40
threshold: 0.6
result: 0.46
len: 46
threshold: 0.7
result: 0.67
len: 30
threshold: 0.8
result: 0.78
len: 18


region: LCS
threshold: 0.5
result: 0.46
len: 46
threshold: 0.6
result: 0.36
len: 33
threshold: 0.7
result: 0.8
len: 40
threshold: 0.8
result: 0.76
len: 21


region: LEC
threshold: 0.5
result: 0.54
len: 28
threshold: 0.6
result: 0.66
len: 41
threshold: 0.7
result: 0.69
len: 36
threshold: 0.8
result: 0.83
len: 18


region: CBLOL_Tier2
threshold: 0.5
result: 0.62
len: 40
threshold: 0.6
result: 0.68
len: 34
threshold: 0.7
result: 0.75
len: 28
threshold: 0.8
len: 3


region: TCL
threshold: 0.5
result: 0.6
len: 47
threshold: 0.6
result: 0.76
len: 34
threshold: 0.7
result: 0.73
len: 11
threshold: 0.8
len: 9


region: LCO
threshold: 0.5
result: 0.43
len: 21
threshold: 0.6
result: 0.78
len: 9
threshold: 0.7
result: 0.67
len: 18
threshol

In [32]:
perc_val_result_df['diff'] = perc_val_result_df['result'] - perc_val_result_df['threshold']

Unnamed: 0,region,threshold,result,len
0,EU,0.5,0.57,40
1,EU,0.6,0.46,46
2,EU,0.7,0.67,30
3,EU,0.8,0.78,18
4,LCS,0.5,0.46,46
...,...,...,...,...
99,GLL,0.5,0.55,22
100,GLL,0.6,0.60,20
101,GLL,0.8,0.88,8
102,NA_Tier2,0.5,0.59,44


In [7]:
Utils.regions_stats

Unnamed: 0,region,model,test_size,train_size,error,cut_off_var,threshold,len_lost
0,Prime,0,478,2388,0.283,1.5,0.35,0.55
1,LVP,0,368,3953,0.272,1.5,0.35,0.63
2,NLC,10,255,2081,0.154,1.5,0.35,0.59
3,LVP2,0,5,1374,0.0,1.5,0.35,0.6
4,Ultraliga,0,319,3031,0.193,1.5,0.35,0.45
5,LFL,0,567,1467,0.222,1.5,0.35,0.57
6,GLL,0,231,1133,0.204,1.5,0.35,0.42
7,MSI,0,158,4725,0.058,1.5,0.35,0.59
8,EU,0,531,1637,0.23,1.5,0.35,0.71
9,LCS,2,567,2533,0.27,1.5,0.35,0.62


In [8]:
print(f'Threshold local: {Utils.logistic_threshold}')
threshold = Utils.regions_stats['threshold'].iloc[0]
print(f'Threshold: {threshold}')
print(mean_real_error)
regions_stats_proof.sort_values(by='error')

Threshold local: 0.25
Threshold: 0.35
0.1889168765743073


Unnamed: 0,region,error,len lost,len,true_len
0,NA_Tier2,0.0,0.77,120,28
0,Elite_Tier2,0.0,0.93,74,5
0,CBLOL,0.037,0.78,121,27
0,Turkey_Tier2,0.038,0.74,100,26
0,LPLOL,0.038,0.64,73,26
0,CBLOL_Tier2,0.059,0.84,105,17
0,GLL,0.071,0.77,61,14
0,TCL,0.087,0.79,112,24
0,LJL,0.125,0.48,107,56
0,EBL,0.133,0.79,72,15


In [6]:
print(f'Threshold local: {Utils.logistic_threshold}')
threshold = Utils.regions_stats['threshold'].iloc[0]
print(f'Threshold: {threshold}')
print(mean_real_error)
#regions_stats_proof.sort_values(by='error')

Threshold local: 0.25
Threshold: 0.35
0.1952941176470588


In [10]:
print(f'Threshold local: {Utils.logistic_threshold}')
threshold = Utils.regions_stats['threshold'].iloc[0]
print(f'Threshold: {threshold}')
print(mean_real_error)
#regions_stats_proof.sort_values(by='error')

Threshold local: 0.25
Threshold: 0.35
0.2455919395465995


# Real proof

In [16]:
Model.get_region_teams('LPL')

Anyone s Legend: 0
Bilibili Gaming: 1
Dominus Esports: 2
Edward Gaming: 3
Funplus Phoenix: 4
Invictus Gaming: 5
JD Gaming: 6
LGD Gaming: 7
LNG Esports: 8
Ninjas in Pyjamas: 9
OMG: 10
Rare Atom: 11
Rogue Warriors: 12
Royal Never Give Up: 13
Suning: 14
TT: 15
Team WE: 16
Top Esports: 17
Ultra Prime: 18
Vici Gaming: 19
Victory Five: 20
Weibo Gaming: 21
eStar: 22


In [11]:
Model.blue_team = 11
Model.red_team = 8
Model.print_team_players()

team_blue_list = ['Last line-up', 'Cube', 'Leyan', 'Strive', 'Assum']
team_red_list = ['Zika', 'Tarzan', 'Scout', 'LP', 'Hang']


In [14]:
team_blue_list = ['Last line-up', 'Ale', 'Jiejie', 'FoFo', 'Uzi']
team_red_list = ['Zika', 'Tarzan', 'Scout', 'GALA', 'Hang']

Model.make_prediction(manual_insert=False, team_blue_list=team_blue_list, team_red_list=team_red_list)

last train data: 2023-06-18 00:00:00
no data names on: []
0.4680206287686034


# Auto real proof

In [23]:
region='CBLOL'
Model.get_region_teams(region=region)

CNB e-Sports Club: 0
FURIA Esports: 1
FURIA Uppercut: 2
Flamengo Los Grandes: 3
Flamengo eSports: 4
Fluxo: 5
INTZ e-Sports: 6
INTZ eSports: 7
KaBuM! e-Sports: 8
LOUD: 9
Liberty: 10
Los Grandes: 11
Netshoes Miners: 12
ProGaming eSports: 13
Prodigy Esports: 14
RED Canids: 15
Redemption POA: 16
Rensga eSports: 17
Santos e-Sports: 18
Team oNe eSports: 19
Uppercut eSports: 20
Vivo Keyd: 21
Vivo Keyd Stars: 22
Vorax Liberty: 23
paiN Gaming: 24


In [24]:
df = pd.DataFrame(
                    [
                    [10, 9]
                    ]
                )

In [25]:
for n in range(len(df)):
    Model.get_region_teams(region,verbose=False)
    Model.blue_team = df[0].iloc[n]
    Model.red_team = df[1].iloc[n]
    print(f'blue team: {Model.teams_dict[Model.blue_team]}\nred team: {Model.teams_dict[Model.red_team]}')
    Model.make_prediction(manual_insert=False)
    print('\n===============\n')

blue team: Liberty
red team: LOUD
no data names on: []
0.8052251822161905




In [27]:
Utils.bet_ratio_vars(result=0.1, ratio=None, chance=0.20)

Expected ratio: 9.499999999999998
