# Setup

In [1]:
import Utils.model_file as model_file
import Utils.utils_file as utils_file
import Utils.scraping_file as scraping_file
from Utils.constants import *

import numpy as np
import pandas as pd
import sklearn.metrics as skm

In [2]:
Model = model_file.Final_Model_Class(target='Score', model_type='logistic')
Scraping = scraping_file.Scraping_Class()
Utils = Model.Utils
Utils.TARGET_DF = Utils.TARGET_DF[Utils.TARGET_DF[Utils.TARGET]!=2]

In [3]:
validation_df = Utils.TARGET_DF.copy()
validation_df = validation_df[validation_df['realSemesterYear'] == CURRENT_YEAR_SEMESTER]
print(validation_df.Date.max())

2023-06-18 00:00:00


In [4]:
print(np.mean(Utils.regions_stats['error']))
Utils.regions_stats.head()

0.2193333333333333


Unnamed: 0,region,model,test_size,train_size,error,cut_off_var,threshold,len_lost
0,LFL,3,950,5079,0.247,1.5,0.35,0.63
1,LPL,0,1577,8466,0.244,1.5,0.35,0.56
2,Prime_Tier2,0,128,9102,0.205,1.5,0.35,0.97
3,Ultraliga,0,432,3189,0.27,1.5,0.35,0.6
4,SuperLiga_Tier2,0,120,2723,0.17,1.5,0.35,0.94


# Auto validation

In [5]:
final_df = pd.DataFrame()
regions_stats_proof = pd.DataFrame(columns=['region','error','len lost','len', 'true_len'])
perc_val_df = pd.DataFrame(columns=['predicted','true'])
Utils.logistic_threshold = 0.25
final_len = 0

for tournament in validation_df.tournamentId.unique():
    print('============')
    print(f'Tournament: {tournament}')

    region_abrev = tournament[:-5]
    if region_abrev not in Utils.regions_feature_cols:
        continue
    if region_abrev == 'VCS':
        continue

    old_error = Utils.regions_stats[Utils.regions_stats['region']==region_abrev].error.iloc[0]
    target_df = Utils.TARGET_DF.copy()
    feature_columns = Utils.regions_feature_cols[region_abrev]
    train_data = Utils.regions_train_data[region_abrev]
    target_columns = [x for x in list(target_df.columns) if x.replace('Team_Red_','').replace('Team_Blue_','') in feature_columns]

    train_df = target_df[target_df['regionAbrev'].isin(train_data)].copy()
    train_df = train_df[train_df['realSemesterYear'].astype(int) < int(CURRENT_YEAR_SEMESTER)]
    train_df = train_df[target_columns + Utils.INFO_COLS]
    xtrain = train_df.drop(['Date', Utils.TARGET] + OFF_COLS, axis=1, errors='ignore').copy()
    ytrain = train_df[Utils.TARGET]

    prediction_df = validation_df[validation_df['tournamentId'] == tournament].copy()
    prediction_df = prediction_df[target_columns + Utils.INFO_COLS]
    xtest = prediction_df.drop(['Date', Utils.TARGET] + OFF_COLS, axis=1, errors='ignore').copy()
    ytest = prediction_df[Utils.TARGET]
    
    model_number = Utils.regions_stats[Utils.regions_stats.region == region_abrev].model.iloc[0]

    metric, prediction = Utils.make_pred(model_number=model_number, xtrain=xtrain, ytrain=ytrain, xtest=xtest, ytest=ytest, reps=1)
    true_len = round(len(ytest)*abs(Utils.len_lost-1))

    df_to_concat = pd.DataFrame([prediction, ytest]).transpose()
    df_to_concat['region'] = region_abrev
    final_df = pd.concat([final_df,df_to_concat])

    df_to_concat = pd.DataFrame([region_abrev, metric, Utils.len_lost, len(ytest), true_len]).transpose()
    df_to_concat.columns = ['region','error','len lost','len','true_len']
    regions_stats_proof = pd.concat([regions_stats_proof,df_to_concat])

    df_temp = pd.DataFrame(dict({'predicted':prediction,'true':ytest}))
    df_temp['region'] = region_abrev
    perc_val_df = pd.concat([perc_val_df, df_temp])

    print(f'\nExpected error: {old_error}')
    print(f'Real error: {metric}')
    print(f'Len: {len(ytest)}')
    print(f'Len lost: {Utils.len_lost}')
    print(f'True len: {true_len}')


final_df_filter = final_df[(final_df[0]<=Utils.logistic_threshold) | (final_df[0]>=1-Utils.logistic_threshold)]
final_df_filter[0] = final_df_filter[0].round()
final_df_filter['error'] = final_df_filter[0]!=final_df_filter[1]
mean_real_error = skm.mean_absolute_error(round(final_df_filter[0]), final_df_filter[1])
all_len_lost = len(final_df) - len(final_df_filter)
regions_stats_proof = regions_stats_proof[regions_stats_proof['true_len']>0]

perc_val_df['round'] = perc_val_df['predicted'].round().astype(int)
perc_val_df['result'] = (perc_val_df['true'] == perc_val_df['round']).replace({True:1,False:0})
perc_val_df['predicted'] = perc_val_df['predicted'].apply(lambda x: 1-x if x<0.5 else x)
perc_val_df.reset_index(drop=True,inplace=True)

print('\n===============\n')
print(f'Mean real error: {mean_real_error}')
print(f'Len lost: {all_len_lost}')
print(f'Start len: {len(final_df)}')
print(f'Final len: {len(final_df_filter)}')
print(f'last train data: {Model.last_game_date}')


Tournament: Hitpoint_Tier220231

Expected error: 0.185
Real error: 0.0
Len: 16
Len lost: 0.62
True len: 6
Tournament: LFL20231

Expected error: 0.247
Real error: 0.0
Len: 50
Len lost: 0.98
True len: 1
Tournament: LLA20231
no data left after filtering

Expected error: 0.244
Real error: 1.0
Len: 18
Len lost: 1.0
True len: 0
Tournament: LPL20231

Expected error: 0.244
Real error: 0.0
Len: 112
Len lost: 0.8
True len: 22
Tournament: LCO20231

Expected error: 0.16
Real error: 0.0
Len: 10
Len lost: 0.8
True len: 2
Tournament: LCK_Tier220231

Expected error: 0.324
Real error: 0.667
Len: 43
Len lost: 0.86
True len: 6
Tournament: NLC20231

Expected error: 0.208
Real error: 0.333
Len: 16
Len lost: 0.81
True len: 3
Tournament: GLL20231
no data left after filtering

Expected error: 0.163
Real error: 1.0
Len: 20
Len lost: 1.0
True len: 0
Tournament: Elite_Tier220231

Expected error: 0.245
Real error: 0.0
Len: 16
Len lost: 0.94
True len: 1
Tournament: Prime_Tier220231
no data left after filtering

Ex

In [6]:
perc_val_result_df = pd.DataFrame()
for reg in validation_df.tournamentId.apply(lambda x: x[:-5]).unique():
    print('\n======================================\n')
    print(f'region: {reg}')
    for x in np.arange(0.5,0.9,0.1):
        print('==========')
        print(f'threshold: {round(x,2)}')
        threshold = x
        test_df = perc_val_df[(perc_val_df['predicted']>=threshold)
                                        & (perc_val_df['predicted']<=threshold+0.1)
                                        & (perc_val_df['region']==reg)]
        
        if len(test_df.result.unique())>1:
            result = round(test_df.result.value_counts()[1]/len(test_df),2)
            print(f'result: {round(test_df.result.value_counts()[1]/len(test_df),2)}')
            perc_val_result_df = perc_val_result_df.append(pd.Series([reg, round(threshold,2), result, len(test_df)]), ignore_index=True)

        print(f'len: {len(test_df)}')

perc_val_result_df.columns = ['region','threshold','result','len']
perc_val_result_df['diff'] = perc_val_result_df['result'] - perc_val_result_df['threshold']



region: Hitpoint_Tier2
threshold: 0.5
result: 0.33
len: 3
threshold: 0.6
result: 0.71
len: 7
threshold: 0.7
len: 3
threshold: 0.8
len: 2


region: LFL
threshold: 0.5
result: 0.5
len: 34
threshold: 0.6
result: 0.64
len: 14
threshold: 0.7
len: 2
threshold: 0.8
len: 0


region: LLA
threshold: 0.5
result: 0.58
len: 12
threshold: 0.6
len: 4
threshold: 0.7
len: 2
threshold: 0.8
len: 0


region: LPL
threshold: 0.5
result: 0.65
len: 40
threshold: 0.6
result: 0.51
len: 45
threshold: 0.7
result: 0.87
len: 15
threshold: 0.8
len: 12


region: LCO
threshold: 0.5
result: 0.67
len: 6
threshold: 0.6
len: 2
threshold: 0.7
len: 2
threshold: 0.8
len: 0


region: LCK_Tier2
threshold: 0.5
result: 0.25
len: 16
threshold: 0.6
result: 0.53
len: 15
threshold: 0.7
result: 0.67
len: 6
threshold: 0.8
result: 0.33
len: 6


region: NLC
threshold: 0.5
result: 0.17
len: 6
threshold: 0.6
result: 0.33
len: 6
threshold: 0.7
result: 0.75
len: 4
threshold: 0.8
len: 0


region: GLL
threshold: 0.5
result: 0.3
len: 10
thre

In [7]:
perc_val_result_df[(perc_val_result_df['diff']<-0.1) & (perc_val_result_df['len']>10)]

Unnamed: 0,region,threshold,result,len,diff
9,LCK_Tier2,0.5,0.25,16,-0.25
35,LCK,0.5,0.32,25,-0.18
45,SuperLiga,0.5,0.18,11,-0.32


In [8]:
bad_diffs = perc_val_result_df[(perc_val_result_df['diff']<-0.1)
                               & (perc_val_result_df['len']>15)]
print(len(bad_diffs)/len(perc_val_result_df))
bad_diffs['cont'] = 1
bad_diffs.groupby(by='region',as_index=False)['cont'].sum()

0.04


Unnamed: 0,region,cont
0,LCK,1
1,LCK_Tier2,1


In [9]:
print(perc_val_result_df.region.unique())
perc_val_result_df[perc_val_result_df['region']=='CBLOL']

['Hitpoint_Tier2' 'LFL' 'LLA' 'LPL' 'LCO' 'LCK_Tier2' 'NLC' 'GLL'
 'Elite_Tier2' 'Prime_Tier2' 'Ultraliga' 'CBLOL_Tier2' 'Arabian' 'NACL'
 'SuperLiga_Tier2' 'LJL' 'CBLOL' 'LCK' 'PG' 'Prime' 'EBL' 'SuperLiga'
 'LPLOL']


Unnamed: 0,region,threshold,result,len,diff
34,CBLOL,0.6,0.12,8,-0.48


In [10]:
Utils.regions_stats

Unnamed: 0,region,model,test_size,train_size,error,cut_off_var,threshold,len_lost
0,LFL,3,950,5079,0.247,1.5,0.35,0.63
1,LPL,0,1577,8466,0.244,1.5,0.35,0.56
2,Prime_Tier2,0,128,9102,0.205,1.5,0.35,0.97
3,Ultraliga,0,432,3189,0.27,1.5,0.35,0.6
4,SuperLiga_Tier2,0,120,2723,0.17,1.5,0.35,0.94
5,SuperLiga,0,121,5101,0.325,1.5,0.35,0.71
6,MSI,0,153,9295,0.087,1.5,0.35,0.65
7,EMEA,10,139,1819,0.312,1.5,0.35,0.77
8,LCO,2,377,4116,0.16,1.5,0.35,0.39
9,VCS,0,617,1784,0.288,1.5,0.35,0.37


In [11]:
print(f'Threshold local: {Utils.logistic_threshold}')
threshold = Utils.regions_stats['threshold'].iloc[0]
print(f'Threshold: {threshold}')
print(mean_real_error)
#regions_stats_proof.sort_values(by='error')

Threshold local: 0.25
Threshold: 0.35
0.19318181818181818


# Real proof

In [16]:
Model.get_region_teams('LPL')

Anyone s Legend: 0
Bilibili Gaming: 1
Dominus Esports: 2
Edward Gaming: 3
Funplus Phoenix: 4
Invictus Gaming: 5
JD Gaming: 6
LGD Gaming: 7
LNG Esports: 8
Ninjas in Pyjamas: 9
OMG: 10
Rare Atom: 11
Rogue Warriors: 12
Royal Never Give Up: 13
Suning: 14
TT: 15
Team WE: 16
Top Esports: 17
Ultra Prime: 18
Vici Gaming: 19
Victory Five: 20
Weibo Gaming: 21
eStar: 22


In [11]:
Model.blue_team = 11
Model.red_team = 8
Model.print_team_players()

team_blue_list = ['Last line-up', 'Cube', 'Leyan', 'Strive', 'Assum']
team_red_list = ['Zika', 'Tarzan', 'Scout', 'LP', 'Hang']


In [14]:
team_blue_list = ['Last line-up', 'Ale', 'Jiejie', 'FoFo', 'Uzi']
team_red_list = ['Zika', 'Tarzan', 'Scout', 'GALA', 'Hang']

Model.make_prediction(manual_insert=False, team_blue_list=team_blue_list, team_red_list=team_red_list)

last train data: 2023-06-18 00:00:00
no data names on: []
0.4680206287686034


# Auto real proof

In [106]:
region='Hitpoint_Tier2'
Model.get_region_teams(region=region)
Utils.regions_stats.region.unique()

AS Trencin esports: 0
Absolute Legends CZSK: 1
BRUTE: 2
BRUTE Academy: 3
Cryptova: 4
Cyber Gaming: 5
Dark Tigers: 6
Dark Tigers academy: 7
Dynamo Eclot: 8
Dynamo Eclot Academy: 9
Dynamo Esports: 10
ECLOT: 11
ECLOT Academy: 12
Eclot Gaming: 13
Entropiq: 14
Esport Academy: 15
Flayn Esports CZSK Edition: 16
Gaming Team Krava?e: 17
Gunrunners: 18
Inaequalis: 19
Inside Games: 20
Inside Games Academy: 21
RAMS: 22
Repre Gold: 23
SINNERS Esports: 24
STOPWATCH eSports: 25
Team Brute: 26
Team Moops: 27
Team Sampi: 28
Team Universe: 29
Vikingekrig Academy: 30
Vikingekrig Esports: 31
eSuba: 32
eSuba Academy: 33
eXtatus: 34


array(['LFL', 'LPL', 'Prime_Tier2', 'Ultraliga', 'SuperLiga_Tier2',
       'SuperLiga', 'MSI', 'EMEA', 'LCO', 'VCS', 'LEC', 'CBLOL_Tier2',
       'CBLOL', 'LJL', 'LLA', 'LCK', 'LCS', 'PCS', 'LCK_Tier2', 'NACL',
       'GLL', 'Prime', 'LPLOL', 'EBL', 'TCL', 'PG', 'Hitpoint_Tier2',
       'Elite_Tier2', 'Arabian', 'NLC'], dtype=object)

In [108]:
df = pd.DataFrame(
                    [
                    [28, 4]
                    ]
                )

for n in range(len(df)):
    Model.get_region_teams(region,verbose=False)
    Model.blue_team = df[0].iloc[n]
    Model.red_team = df[1].iloc[n]
    print(f'blue team: {Model.teams_dict[Model.blue_team]}\nred team: {Model.teams_dict[Model.red_team]}')
    Model.make_prediction(manual_insert=False)
    print('\n===============\n')

blue team: Team Sampi
red team: Cryptova
no data names on: []
0.43482708773603745




In [6]:
Utils.bet_ratio_vars(result=0.05, ratio=None, chance=0.5)

Expected ratio: 2.1
