# Setup

In [1]:
import Utils.model_file as model_file
import Utils.utils_file as utils_file
import Utils.scraping_file as scraping_file
from Utils.constants import *

import numpy as np
import pandas as pd
import sklearn.metrics as skm

In [2]:
Model = model_file.Final_Model_Class(target='Score', model_type='logistic')
Scraping = scraping_file.Scraping_Class()
Utils = Model.Utils
Utils.TARGET_DF = Utils.TARGET_DF[Utils.TARGET_DF[Utils.TARGET]!=2]

In [3]:
validation_df = Utils.TARGET_DF.copy()
validation_df = validation_df[validation_df['realSemesterYear'] == CURRENT_YEAR_SEMESTER]
print(validation_df.Date.max())

2023-06-18 00:00:00


In [156]:
print(np.mean(Utils.regions_stats['error']))
Utils.regions_stats.head()

0.3813333333333333


Unnamed: 0,region,model,test_size,train_size,error,cut_off_var,threshold,len_lost,mean_diff
0,LFL,0,950,2500,0.386,1.5,0.35,0.14,-0.052857
1,LPL,0,1577,3735,0.319,1.5,0.35,0.09,-0.04
2,Prime_Tier2,0,128,2055,0.411,1.5,0.35,0.12,-0.0425
3,Ultraliga,0,432,4140,0.403,1.5,0.35,0.12,-0.06
4,SuperLiga_Tier2,0,120,3126,0.423,1.5,0.35,0.07,-0.035


# Auto validation

In [5]:
final_df = pd.DataFrame()
regions_stats_proof = pd.DataFrame(columns=['region','error','len lost','len', 'true_len'])
perc_val_df = pd.DataFrame(columns=['predicted','true'])
Utils.logistic_threshold = 0.35
final_len = 0

for tournament in validation_df.tournamentId.unique():
    print('============')
    print(f'Tournament: {tournament}')

    region_abrev = tournament[:-5]
    if region_abrev not in Utils.regions_feature_cols:
        continue
    if region_abrev == 'VCS':
        continue

    old_error = Utils.regions_stats[Utils.regions_stats['region']==region_abrev].error.iloc[0]
    target_df = Utils.TARGET_DF.copy()
    feature_columns = Utils.regions_feature_cols[region_abrev]
    train_data = Utils.regions_train_data[region_abrev]
    target_columns = [x for x in list(target_df.columns) if x.replace('Team_Red_','').replace('Team_Blue_','') in feature_columns]

    train_df = target_df[target_df['regionAbrev'].isin(train_data)].copy()
    train_df = train_df[train_df['realSemesterYear'].astype(int) < int(CURRENT_YEAR_SEMESTER)]
    train_df = train_df[target_columns + Utils.INFO_COLS]
    xtrain = train_df.drop(['Date', Utils.TARGET] + OFF_COLS, axis=1, errors='ignore').copy()
    ytrain = train_df[Utils.TARGET]

    prediction_df = validation_df[validation_df['tournamentId'] == tournament].copy()
    prediction_df = prediction_df[target_columns + Utils.INFO_COLS]
    xtest = prediction_df.drop(['Date', Utils.TARGET] + OFF_COLS, axis=1, errors='ignore').copy()
    ytest = prediction_df[Utils.TARGET]
    
    model_number = Utils.regions_stats[Utils.regions_stats.region == region_abrev].model.iloc[0]

    metric, prediction, mean_diff = Utils.make_pred(model_number=model_number, xtrain=xtrain, ytrain=ytrain, xtest=xtest, ytest=ytest, reps=1)
    true_len = round(len(ytest)*abs(Utils.len_lost-1))

    df_to_concat = pd.DataFrame([prediction, ytest]).transpose()
    df_to_concat['region'] = region_abrev
    final_df = pd.concat([final_df,df_to_concat])

    df_to_concat = pd.DataFrame([region_abrev, metric, Utils.len_lost, len(ytest), true_len]).transpose()
    df_to_concat.columns = ['region','error','len lost','len','true_len']
    regions_stats_proof = pd.concat([regions_stats_proof,df_to_concat])

    df_temp = pd.DataFrame(dict({'predicted':prediction,'true':ytest}))
    df_temp['region'] = region_abrev
    perc_val_df = pd.concat([perc_val_df, df_temp])

    print(f'\nExpected error: {old_error}')
    print(f'Real error: {metric}')
    print(f'Len: {len(ytest)}')
    print(f'Len lost: {Utils.len_lost}')
    print(f'True len: {true_len}')


final_df_filter = final_df[(final_df[0]<=Utils.logistic_threshold) | (final_df[0]>=1-Utils.logistic_threshold)]
final_df_filter[0] = final_df_filter[0].round()
final_df_filter['error'] = final_df_filter[0]!=final_df_filter[1]
mean_real_error = skm.mean_absolute_error(round(final_df_filter[0]), final_df_filter[1])
all_len_lost = len(final_df) - len(final_df_filter)
regions_stats_proof = regions_stats_proof[regions_stats_proof['true_len']>0]

perc_val_df['round'] = perc_val_df['predicted'].round().astype(int)
perc_val_df['result'] = (perc_val_df['true'] == perc_val_df['round']).replace({True:1,False:0})
perc_val_df['predicted'] = perc_val_df['predicted'].apply(lambda x: 1-x if x<0.5 else x)
perc_val_df.reset_index(drop=True,inplace=True)

print('\n===============\n')
print(f'Mean real error: {mean_real_error}')
print(f'Len lost: {all_len_lost}')
print(f'Start len: {len(final_df)}')
print(f'Final len: {len(final_df_filter)}')
print(f'last train data: {Model.last_game_date}')


Tournament: Hitpoint_Tier220231

Expected error: 0.347
Real error: 0.167
Len: 16
Len lost: 0.62
True len: 6
Tournament: LFL20231

Expected error: 0.386
Real error: 0.25
Len: 50
Len lost: 0.68
True len: 16
Tournament: LLA20231

Expected error: 0.383
Real error: 0.722
Len: 18
Len lost: 0.0
True len: 18
Tournament: LPL20231

Expected error: 0.319
Real error: 0.255
Len: 112
Len lost: 0.58
True len: 47
Tournament: LCO20231

Expected error: 0.249
Real error: 0.5
Len: 10
Len lost: 0.6
True len: 4
Tournament: LCK_Tier220231

Expected error: 0.423
Real error: 0.4
Len: 43
Len lost: 0.77
True len: 10
Tournament: NLC20231

Expected error: 0.363
Real error: 0.6
Len: 16
Len lost: 0.69
True len: 5
Tournament: GLL20231

Expected error: 0.403
Real error: 0.0
Len: 20
Len lost: 0.8
True len: 4
Tournament: Elite_Tier220231

Expected error: 0.447
Real error: 0.333
Len: 16
Len lost: 0.62
True len: 6
Tournament: Prime_Tier220231

Expected error: 0.411
Real error: 0.154
Len: 26
Len lost: 0.5
True len: 13
Tour

In [6]:
perc_val_result_df = pd.DataFrame()
for reg in validation_df.tournamentId.apply(lambda x: x[:-5]).unique():
    print('\n======================================\n')
    print(f'region: {reg}')
    for x in np.arange(0.5,0.9,0.1):
        print('==========')
        print(f'threshold: {round(x,2)}')
        threshold = x
        test_df = perc_val_df[(perc_val_df['predicted']>=threshold)
                                        & (perc_val_df['predicted']<=threshold+0.1)
                                        & (perc_val_df['region']==reg)]
        
        if len(test_df.result.unique())>1:
            result = round(test_df.result.value_counts()[1]/len(test_df),2)
            print(f'result: {round(test_df.result.value_counts()[1]/len(test_df),2)}')
            perc_val_result_df = perc_val_result_df.append(pd.Series([reg, round(threshold,2), result, len(test_df)]), ignore_index=True)

        print(f'len: {len(test_df)}')

perc_val_result_df.columns = ['region','threshold','result','len']
perc_val_result_df['diff'] = perc_val_result_df['result'] - perc_val_result_df['threshold']



region: Hitpoint_Tier2
threshold: 0.5
result: 0.25
len: 8
threshold: 0.6
result: 0.8
len: 5
threshold: 0.7
len: 3
threshold: 0.8
len: 0


region: LFL
threshold: 0.5
result: 0.5
len: 24
threshold: 0.6
result: 0.67
len: 21
threshold: 0.7
len: 5
threshold: 0.8
len: 0


region: LLA
threshold: 0.5
len: 0
threshold: 0.6
len: 2
threshold: 0.7
len: 5
threshold: 0.8
result: 0.56
len: 9


region: LPL
threshold: 0.5
result: 0.62
len: 45
threshold: 0.6
result: 0.67
len: 39
threshold: 0.7
result: 0.83
len: 23
threshold: 0.8
len: 5


region: LCO
threshold: 0.5
len: 4
threshold: 0.6
result: 0.67
len: 6
threshold: 0.7
len: 0
threshold: 0.8
len: 0


region: LCK_Tier2
threshold: 0.5
result: 0.26
len: 23
threshold: 0.6
result: 0.5
len: 16
threshold: 0.7
result: 0.5
len: 4
threshold: 0.8
len: 0


region: NLC
threshold: 0.5
result: 0.5
len: 8
threshold: 0.6
result: 0.4
len: 5
threshold: 0.7
result: 0.33
len: 3
threshold: 0.8
len: 0


region: GLL
threshold: 0.5
result: 0.57
len: 7
threshold: 0.6
result: 0

In [121]:
perc_val_result_df[(perc_val_result_df['diff']<-0.1)]

Unnamed: 0,region,threshold,result,len,diff
0,Hitpoint_Tier2,0.5,0.25,8,-0.25
4,LLA,0.8,0.56,9,-0.24
9,LCK_Tier2,0.5,0.26,23,-0.24
11,LCK_Tier2,0.7,0.5,4,-0.2
13,NLC,0.6,0.4,5,-0.2
14,NLC,0.7,0.33,3,-0.37
17,Elite_Tier2,0.5,0.25,8,-0.25
25,CBLOL_Tier2,0.5,0.29,7,-0.21
27,NACL,0.8,0.5,4,-0.3
33,LJL,0.7,0.5,6,-0.2


In [10]:
print(f'Threshold local: {Utils.logistic_threshold}')
threshold = Utils.regions_stats['threshold'].iloc[0]
print(f'Threshold: {threshold}')
print(mean_real_error)
regions_stats_proof.sort_values(by='error')

Threshold local: 0.35
Threshold: 0.35
0.3188976377952756


Unnamed: 0,region,error,len lost,len,true_len
0,CBLOL_Tier2,0.0,0.89,9,1
0,GLL,0.0,0.8,20,4
0,EBL,0.111,0.55,20,9
0,PG,0.111,0.44,16,9
0,Prime_Tier2,0.154,0.5,26,13
0,NACL,0.167,0.5,24,12
0,Hitpoint_Tier2,0.167,0.62,16,6
0,SuperLiga_Tier2,0.231,0.63,35,13
0,LCK,0.231,0.71,45,13
0,LFL,0.25,0.68,50,16


# Auto real proof

In [152]:
region='Ultraliga'
Model.get_region_teams(region=region)
Utils.regions_stats.region.unique()

7more7 Pompa Team Academy: 0
ACTINA PACT: 1
AGO ROGUE: 2
AVEZ: 3
Akademia: 4
Alior Bank Team: 5
Diablo Chairs: 6
Esports Academy: 7
Exeed Poland: 8
Forsaken: 9
Gentlemens Gaming: 10
Goskilla: 11
Grypciocraft Esports: 12
Illuminar Gaming: 13
Indictive Esports: 14
Iron Wolves: 15
K1CK: 16
K1CK Neosurf: 17
Komil & Friends: 18
Komputronik H34T: 19
Maturalni Forsaken: 20
Orbit Anonymo: 21
PDW: 22
PRIDE: 23
Piast Gliwice Esports: 24
Pompa Team: 25
R-SIXTEAM: 26
Rogue Esports Club: 27
Szata Maga: 28
Team ESCA Gaming: 29
Valkiria Esports: 30
Wisla Plock: 31
Zero Tenacity: 32
devils.one: 33
piratesports: 34
soon to be named: 35


array(['LFL', 'LPL', 'Prime_Tier2', 'Ultraliga', 'SuperLiga_Tier2',
       'SuperLiga', 'MSI', 'EMEA', 'LCO', 'VCS', 'LEC', 'CBLOL_Tier2',
       'CBLOL', 'LJL', 'LLA', 'LCK', 'LCS', 'PCS', 'LCK_Tier2', 'NACL',
       'GLL', 'Prime', 'LPLOL', 'EBL', 'TCL', 'PG', 'Hitpoint_Tier2',
       'Elite_Tier2', 'Arabian', 'NLC'], dtype=object)

In [153]:
df = pd.DataFrame(
[
[12, 13]
]
                )

for n in range(len(df)):
    Model.get_region_teams(region,verbose=False)
    Model.blue_team = df[0].iloc[n]
    Model.red_team = df[1].iloc[n]
    print(f'blue team: {Model.teams_dict[Model.blue_team]}\nred team: {Model.teams_dict[Model.red_team]}')
    Model.make_prediction(manual_insert=False)
    print('\n===============\n')

blue team: Grypciocraft Esports
red team: Illuminar Gaming
no data names on: []
0.7841159002670329




In [154]:
Utils.bet_ratio_vars(result=0.05, ratio=None, chance=0.78)

Expected ratio: 1.346153846153846


In [155]:
(+ 0.54 + 0.41 - 1 + 0.7 #LEC
  -1 -1 #LPL
 + 0.52 #BR
 + 0.56 #BR ACAD
 + 0.42 #LJL
 + 0.14 + 0.33 +0.9 #masters
 ) 

1.52