In [9]:
import pandas as pd
import boto3
import time
from datetime import datetime, timedelta
def run_query(query, database, s3_output):
    client = boto3.client('athena')
    response = client.start_query_execution(
        QueryString=query,
        QueryExecutionContext={
            'Database': database
        },
        ResultConfiguration={
            'OutputLocation': s3_output,
        }
    )
    return response['QueryExecutionId']

def get_results(query_id):
    client = boto3.client('athena')
    
    while True:
        response = client.get_query_execution(QueryExecutionId=query_id)
        if response['QueryExecution']['Status']['State'] == 'SUCCEEDED':
            break
        elif response['QueryExecution']['Status']['State'] == 'FAILED':
            raise Exception("Athena query failed!")
        time.sleep(2)  # adjust this or use a back-off algorithm if needed
    
    result = client.get_query_results(QueryExecutionId=query_id)
    return result

In [83]:
def recent_game_stats(team_ids):
    
    if isinstance(team_ids, (list, tuple)):
        team_ids_str = ', '.join(map(str, team_ids))
    else:
        team_ids_str = str(team_ids)
    
    query = f"""
WITH unnested_tournaments AS (
    SELECT 
        id AS league_id,
        region,
        tournament.id AS tournament_id
    FROM 
        lol.leagues
        CROSS JOIN UNNEST(tournaments) AS t (tournament)
),
tourney AS (
    SELECT * 
    FROM lol.tournaments
    WHERE startdate > '2001-01-01'
    AND startdate < '2023-12-12'
),
tourney_matches AS (
    SELECT 
        t.*,
        tr.region,  -- Adding the region column here
        stage.name AS stage_name,
        stage.type AS stage_type,
        stage.slug AS stage_slug,
        section.name AS section_name,
        match_item.id AS match_id,
        match_item.type AS match_type,
        match_item.state AS match_state,
        match_item.mode AS match_mode,
        match_item.strategy.type AS match_strategy_type,
        match_item.strategy.count AS match_strategy_count,
        team.id AS team_id,
        team.side AS team_side,
        team.record.wins AS team_wins,
        team.record.losses AS team_losses,
        team.record.ties AS team_ties,
        team.result.outcome AS team_outcome,
        team.result.gamewins AS team_gamewins,
        player.id AS player_id,
        player.role AS player_role
    FROM tourney t
    JOIN unnested_tournaments tr ON tr.tournament_id = t.id  -- Joining on the tournament_id to get the region
    CROSS JOIN UNNEST(stages) AS t (stage)
    CROSS JOIN UNNEST(stage.sections) AS s (section)
    CROSS JOIN UNNEST(section.matches) AS m (match_item)
    CROSS JOIN UNNEST(match_item.teams) AS tm (team)
    CROSS JOIN UNNEST(team.players) AS p (player)
),
teamWins AS (
    SELECT distinct team_id, lol.teams.slug, tourney_matches.region, lol.teams.name, acronym, team_wins, team_losses, team_ties, team_gamewins 
    FROM tourney_matches
    JOIN lol.teams 
    USING(team_id)
),
teamStats AS (
    select region, lol.teams.slug, teamWins.name, teamWins.acronym, team_id, sum(team_wins) as nwin, sum(team_losses) nloss
    from teamWins
    join lol.teams using(team_id)
    group by team_id, lol.teams.slug, region, teamWins.name, teamWins.acronym
)
SELECT * FROM teamStats

    """
    database = "lol"
    s3_output = "s3://query-results-144/a/Dont-bill-me/"
    query_id = run_query(query, database, s3_output)
    result = get_results(query_id)
    tourney_info = []
    headers = []
    for i, Rows in enumerate(result['ResultSet']['Rows']):
        if i == 0:
            for El in Rows['Data']:
                val = El['VarCharValue']
                headers.append(val)
            continue
        tourney_info.append({f'{header}':Rows['Data'][i]['VarCharValue'] for i,header in enumerate(headers)})
    return tourney_info




In [85]:
teams = ['98767991954244555', '103877625775457850', '104367068120825486', '105913111502565010']

team_data = recent_game_stats(teams)

In [86]:
# just the given teams

df = pd.DataFrame(team_data)

df = df[df['team_id'].isin(teams)]

df['nwin'] = df['nwin'].astype(int)
df['nloss'] = df['nloss'].astype(int)
df['win_loss_ratio'] = df.apply(lambda row: row['nwin'] if row['nloss'] == 0 else row['nwin'] / (row['nwin'] + row['nloss']), axis=1)
df['ntot'] = df['nwin'] + df['nloss']

threshold = 10
filtered_df = df[df['ntot'] >= threshold]
filtered_df_sorted = filtered_df.sort_values(by=['win_loss_ratio', 'nwin'], ascending=[False, False])
filtered_df_sorted.reset_index(drop=True, inplace=True)

filtered_df_sorted

Unnamed: 0,region,slug,name,acronym,team_id,nwin,nloss,win_loss_ratio,ntot
0,VIETNAM,gam-esports,GAM Esports,GAM,98767991954244555,56,11,0.835821,67
1,"HONG KONG, MACAU, TAIWAN",psg-talon,PSG Talon,PSG,104367068120825486,159,36,0.815385,195
2,EMEA,movistar-riders,Movistar Riders,MRS,103877625775457850,153,104,0.595331,257
3,INTERNATIONAL,psg-talon,PSG Talon,PSG,104367068120825486,48,47,0.505263,95
4,INTERNATIONAL,gam-esports,GAM Esports,GAM,98767991954244555,2,14,0.125,16


In [76]:
# all the teams

df = pd.DataFrame(team_data)

df['nwin'] = df['nwin'].astype(int)
df['nloss'] = df['nloss'].astype(int)
df['win_loss_ratio'] = df.apply(lambda row: row['nwin'] if row['nloss'] == 0 else row['nwin'] / (row['nwin'] + row['nloss']), axis=1)
df['ntot'] = df['nwin'] + df['nloss']
df['team_code'] = df['acronym']
max_games_played = df['ntot'].max()
df['weighted_win_loss_ratio'] = (df['nwin'] / (df['nwin'] + df['nloss'])) * (1 + df['ntot'] / max_games_played)

# Rank teams based on the weighted win-loss ratio
ranked_df = df.sort_values(by='weighted_win_loss_ratio', ascending=False).reset_index(drop=True)

ranked_df

Unnamed: 0,region,name,acronym,team_id,nwin,nloss,win_loss_ratio,ntot,team_code,weighted_win_loss_ratio
0,KOREA,Gen.G,GEN,100205573495116443,229,65,0.778912,294,GEN,1.138974
1,KOREA,T1,T1,98767991853197861,256,97,0.725212,353,T1,1.127728
2,EMEA,DenizBank İstanbul Wildcats,IW,102235771678061291,203,68,0.749077,271,IW,1.068260
3,"HONG KONG, MACAU, TAIWAN",PSG Talon,PSG,104367068120825486,159,36,0.815385,195,PSG,1.065385
4,NORTH AMERICA,Cloud9,C9,98767991877340524,226,97,0.699690,323,C9,1.055036
...,...,...,...,...,...,...,...,...,...,...
518,INTERNATIONAL,TCL All-Stars,TCL,99124844362174905,0,1,0.000000,1,TCL,0.000000
519,NORTH AMERICA,Cold Hearted,COLD,110733881542862348,0,2,0.000000,2,COLD,0.000000
520,LATIN AMERICA,Incubus,INC,107582618156093368,0,1,0.000000,1,INC,0.000000
521,EMEA,The Spawn Esports,SPN,105521201981545679,0,0,0.000000,0,SPN,


In [72]:
import pandas as pd

teams = ['98767991954244555', '103877625775457850', '104367068120825486', '105913111502565010']

# lc = league_comparison(start_date)
team_data = recent_game_stats(teams)

In [80]:
df = pd.DataFrame(team_data)
df = df[df['team_id'].isin(teams)]
df.head()
df['nwin'] = df['nwin'].astype(int)
df['nloss'] = df['nloss'].astype(int)
df['win_loss_ratio'] = df.apply(lambda row: row['nwin'] if row['nloss'] == 0 else row['nwin'] / (row['nwin'] + row['nloss']), axis=1)
df['ntot'] = df['nwin'] + df['nloss']
df['team_code'] = df['acronym']
max_games_played = df['ntot'].max()
df['weighted_win_loss_ratio'] = (df['nwin'] / (df['nwin'] + df['nloss'])) * (1 + df['ntot'] / max_games_played)
missing_teams_df = pd.DataFrame()
for team in teams:
    if team not in ranked_df['team_id'].values:
        new_row = pd.DataFrame({
            'name': [None],
            'acronym': [None],
            'team_id': [team],
            'nwin': [None],
            'nloss': [None],
            'win_loss_ratio': [None],
            'ntot': [None],
            'team_code': [None],
            'weighted_win_loss_ratio': [None]
        })
        missing_teams_df = pd.concat([missing_teams_df, new_row], ignore_index=True)

# Concatenate the ranked_df with missing_teams_df
final_ranked_df = pd.concat([ranked_df, missing_teams_df], ignore_index=True)

final_ranked_df

Unnamed: 0,region,name,acronym,team_id,nwin,nloss,win_loss_ratio,ntot,team_code,weighted_win_loss_ratio
0,KOREA,Gen.G,GEN,100205573495116443,229,65,0.778912,294,GEN,1.138974
1,KOREA,T1,T1,98767991853197861,256,97,0.725212,353,T1,1.127728
2,EMEA,DenizBank İstanbul Wildcats,IW,102235771678061291,203,68,0.749077,271,IW,1.068260
3,"HONG KONG, MACAU, TAIWAN",PSG Talon,PSG,104367068120825486,159,36,0.815385,195,PSG,1.065385
4,NORTH AMERICA,Cloud9,C9,98767991877340524,226,97,0.699690,323,C9,1.055036
...,...,...,...,...,...,...,...,...,...,...
519,NORTH AMERICA,Cold Hearted,COLD,110733881542862348,0,2,0.000000,2,COLD,0.000000
520,LATIN AMERICA,Incubus,INC,107582618156093368,0,1,0.000000,1,INC,0.000000
521,EMEA,The Spawn Esports,SPN,105521201981545679,0,0,0.000000,0,SPN,
522,EMEA,Rejects Gaming,RJX,105521426070232498,0,0,0.000000,0,RJX,


In [65]:
# Compute regional metrics
region_metrics = df.groupby('region').agg(
    total_games=pd.NamedAgg(column='ntot', aggfunc='sum'),
    std_deviation=pd.NamedAgg(column='win_loss_ratio', aggfunc='std'),
    avg_win_loss_ratio=pd.NamedAgg(column='win_loss_ratio', aggfunc='mean')
).reset_index()

# Merge regional metrics with the main dataframe
df = df.merge(region_metrics, on='region', how='left')

# Compute a confidence score for each team based on its region's metrics
# This is just a basic formula; you can adjust the weights as needed
df['confidence_score'] = (
    df['total_games'] / df['total_games'].max() +
    df['std_deviation'] / df['std_deviation'].max() +
    df['avg_win_loss_ratio']
) / 3

# Rank the teams by combining the weighted win-loss ratio and the confidence score
# Adjust the weights as needed
df['final_score'] = df['weighted_win_loss_ratio'] * 0.7 + df['confidence_score'] * 0.3
ranked_df_confidence = df[df['team_id'].isin(teams)].sort_values(by='final_score', ascending=False).reset_index(drop=True)

ranked_df_confidence[['name', 'region', 'weighted_win_loss_ratio', 'confidence_score', 'final_score']]


Unnamed: 0,name,region,weighted_win_loss_ratio,confidence_score,final_score
0,PSG Talon,INTERNATIONAL,0.692034,0.582346,0.659127
1,GAM Esports,INTERNATIONAL,0.132782,0.582346,0.267651
2,Movistar Riders,EMEA,1.190661,,
3,GAM Esports,VIETNAM,1.05372,,
4,PSG Talon,"HONG KONG, MACAU, TAIWAN",1.434062,,


In [90]:
import pandas as pd

# Assuming you've loaded the team_data
df = pd.DataFrame(team_data)

# Preprocessing and computing base metrics
df['nwin'] = df['nwin'].astype(int)
df['nloss'] = df['nloss'].astype(int)
df['win_loss_ratio'] = df.apply(lambda row: row['nwin'] if row['nloss'] == 0 else row['nwin'] / (row['nwin'] + row['nloss']), axis=1)
df['ntot'] = df['nwin'] + df['nloss']
df['team_code'] = df['acronym']
max_games_played = df['ntot'].max()
df['weighted_win_loss_ratio'] = (df['nwin'] / (df['nwin'] + df['nloss'])) * (1 + df['ntot'] / max_games_played)

# Compute the average win-loss ratio of the top teams from each region
top_teams_threshold = 5  # Adjust this value based on how many top teams you want to consider
avg_win_loss_by_region = df.groupby('region')['win_loss_ratio'].nlargest(top_teams_threshold).mean(level=0)

# Compute the total number of games played by teams in each region
total_games_by_region = df.groupby('region')['ntot'].sum()

# Derive a strength score for each region
region_strength = 0.7 * avg_win_loss_by_region + 0.3 * (total_games_by_region / total_games_by_region.max())

# Normalize the strength scores
region_strength_normalized = region_strength / region_strength.sum()
region_strength_normalized.name = 'region_strength'

# Merge the regional strength score with the main dataframe
df = df.merge(region_strength_normalized, on='region', how='left')

# Apply the regional weights to the teams' win-loss ratios
df['adjusted_win_loss_ratio'] = df['win_loss_ratio'] * df['region_strength']

# Rank teams based on the adjusted win-loss ratio
adjusted_ranked_df = df.sort_values(by='adjusted_win_loss_ratio', ascending=False).reset_index(drop=True)

# Display the top teams based on the adjusted ranking
print(adjusted_ranked_df[['name', 'region', 'win_loss_ratio', 'adjusted_win_loss_ratio']].head(10))


                   name         region  win_loss_ratio  \
0           UST Esports  NORTH AMERICA        8.000000   
1    Cloud9 Challengers  NORTH AMERICA        0.782051   
2       Mirage Alliance  NORTH AMERICA        0.769231   
3         Team Coachify  NORTH AMERICA        0.750000   
4  Dignitas Challengers  NORTH AMERICA        0.746667   
5     Bay State College  NORTH AMERICA        0.727273   
6         Team Tony Top  NORTH AMERICA        0.714286   
7                Cloud9  NORTH AMERICA        0.699690   
8             Disguised  NORTH AMERICA        0.675000   
9  Maryville University  NORTH AMERICA        0.666667   

   adjusted_win_loss_ratio  
0                 1.502209  
1                 0.146851  
2                 0.144443  
3                 0.140832  
4                 0.140206  
5                 0.136564  
6                 0.134126  
7                 0.131385  
8                 0.126749  
9                 0.125184  


  avg_win_loss_by_region = df.groupby('region')['win_loss_ratio'].nlargest(top_teams_threshold).mean(level=0)


In [91]:
# Compile regional metrics into a single DataFrame
region_data = pd.concat([
    avg_win_loss_by_region.rename('avg_win_loss_top_teams'),
    total_games_by_region.rename('total_games'),
    region_strength.rename('strength_score'),
    region_strength_normalized.rename('normalized_strength_score')
], axis=1).reset_index()

region_data


Unnamed: 0,region,avg_win_loss_top_teams,total_games,strength_score,normalized_strength_score
0,BRAZIL,0.6569,4256,0.513565,0.059534
1,CHINA,0.747317,509,0.529548,0.061387
2,COMMONWEALTH OF INDEPENDENT STATES,0.713033,546,0.506017,0.058659
3,EMEA,0.724616,23761,0.807231,0.093577
4,"HONG KONG, MACAU, TAIWAN",0.711836,2228,0.526415,0.061024
5,INTERNATIONAL,1.0,1532,0.719343,0.083389
6,JAPAN,0.704714,2493,0.524775,0.060834
7,KOREA,0.711448,6780,0.583616,0.067655
8,LATIN AMERICA,0.602585,1589,0.441871,0.051223
9,LATIN AMERICA NORTH,0.628669,388,0.444967,0.051582


In [92]:
# 1. Isolate International Data
international_df = df[df['region'] == 'INTERNATIONAL']

# 2. Regional Win Rates in International Play
# We'll first extract the original region of each international team (assuming it's part of the 'name' or another column)
# For this example, we'll use 'name' as a proxy for the original region, but this might need adjustments
international_df['original_region'] = international_df['name']  # Adjust as needed
regional_win_rates_international = international_df.groupby('original_region')['win_loss_ratio'].mean()

# 3. Top Performers in International Play
top_international_teams = international_df.sort_values(by='win_loss_ratio', ascending=False).head(10)

# 4. Comparison of Regional and International Performance
# For this, we'll need to merge the international_df with the original df on team_id to get their local and international performances
merged_df = df.merge(international_df[['team_id', 'win_loss_ratio']], on='team_id', how='inner', suffixes=('_local', '_international'))
merged_df['performance_difference'] = merged_df['win_loss_ratio_international'] - merged_df['win_loss_ratio_local']

regional_win_rates_international, top_international_teams[['name', 'win_loss_ratio']], merged_df[['name', 'win_loss_ratio_local', 'win_loss_ratio_international', 'performance_difference']]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  international_df['original_region'] = international_df['name']  # Adjust as needed


(original_region
 100 Thieves                       0.333333
 Beijing JDG Intel Esports Club    0.742857
 Beyond Gaming                     0.428571
 Bilibili Gaming Pingan Bank       0.700000
 Bwipo Dream Team                  1.000000
                                     ...   
 The Chiefs                        0.000000
 Unicorns of Love                  0.288889
 V3 Esports                        0.200000
 WeiboGaming FAW AUDI              0.700000
 paiN Gaming                       0.333333
 Name: win_loss_ratio, Length: 70, dtype: float64,
                  name  win_loss_ratio
 519       LEC Legends             1.0
 408     LLA All-Stars             1.0
 409   LCS Queue Kings             1.0
 410  Bwipo Dream Team             1.0
 403     PCS All-Stars             1.0
 266     OPL All-Stars             1.0
 404     LCL-All-Stars             1.0
 498       LPL Legends             1.0
 128   LCK Queue Kings             1.0
 127     LJL All-Stars             1.0,
                  

In [108]:
# 3. Top Performers in International Play
top_international_teams = international_df.sort_values(by='win_loss_ratio', ascending=False).head(10)

# 4. Comparison of Regional and International Performance
# For this, we'll need to merge the international_df with the original df on team_id to get their local and international performances
merged_df = df.merge(international_df[['team_id', 'win_loss_ratio']], on='team_id', how='inner', suffixes=('_local', '_international'))
merged_df['performance_difference'] = merged_df['win_loss_ratio_international'] - merged_df['win_loss_ratio_local']

# Displaying the results
print(top_international_teams[['name', 'win_loss_ratio']])
print(merged_df[['name', 'win_loss_ratio_local', 'win_loss_ratio_international', 'performance_difference']])


                 name  win_loss_ratio
519       LEC Legends             1.0
408     LLA All-Stars             1.0
409   LCS Queue Kings             1.0
410  Bwipo Dream Team             1.0
403     PCS All-Stars             1.0
266     OPL All-Stars             1.0
404     LCL-All-Stars             1.0
498       LPL Legends             1.0
128   LCK Queue Kings             1.0
127     LJL All-Stars             1.0
                            name  win_loss_ratio_local  \
0           Papara SuperMassive               0.583012   
1           Papara SuperMassive               0.500000   
2    DenizBank İstanbul Wildcats              0.749077   
3    DenizBank İstanbul Wildcats              0.117647   
4                     LGD GAMING              0.125000   
..                           ...                   ...   
115                  LPL Legends              1.000000   
116                LCK All-Stars              0.333333   
117                LEC All-Stars              0.500000   
11

In [153]:
import numpy as np
import pandas as pd

teams = ['98767991954244555', '103877625775457850', '104367068120825486', '105913111502565010']

# lc = league_comparison(start_date)
team_data = recent_game_stats(teams)
# Assuming you've loaded the team_data
df = pd.DataFrame(team_data)

# 1. Compute Basic Metrics
df['nwin'] = df['nwin'].astype(int)
df['nloss'] = df['nloss'].astype(int)
df['win_loss_ratio'] = df.apply(lambda row: row['nwin'] if row['nloss'] == 0 else row['nwin'] / (row['nwin'] + row['nloss']), axis=1)
df['ntot'] = df['nwin'] + df['nloss']
df['team_code'] = df['acronym']

# Modify the weighted win-loss ratio formula with a logarithmic factor
df['weighted_win_loss_ratio'] = df['win_loss_ratio'] * (1 + np.log1p(df['ntot'] / max_games_played))

# 2. Regional Strength Metrics
grouped = df.groupby('region')
avg_win_loss_by_region = grouped.apply(lambda x: x.nlargest(top_teams_threshold, 'win_loss_ratio')['win_loss_ratio'].mean())
total_games_by_region = df.groupby('region')['ntot'].sum()
region_strength = 1.5 * avg_win_loss_by_region + 0.1 * (total_games_by_region / total_games_by_region.max())
region_strength_normalized = region_strength / region_strength.sum()

# 3. International Performance Metrics
international_df = df[df['region'] == 'INTERNATIONAL'].copy()
international_df['original_region'] = international_df['name']
regional_win_rates_international = international_df.groupby('original_region')['win_loss_ratio'].mean()

# 4. Comparison of Local vs. International Performance
merged_df = df.merge(international_df[['team_id', 'win_loss_ratio']], on='team_id', how='left', suffixes=('_local', '_international'))
merged_df['win_loss_ratio_international'].fillna(0, inplace=True)  # Fill NaN values with 0
merged_df['performance_difference'] = merged_df['win_loss_ratio_international'] - merged_df['win_loss_ratio_local']

min_games_threshold = 10
max_games_threshold = 50  # Teams with games more than this value get full reliability

# Calculate the reliability factor
df['reliability_factor'] = np.clip(df['ntot'] / max_games_threshold, 0, 1)

# 5. Final Score & Ranking
weight_wlr = 0.5  # Weight for weighted_win_loss_ratio
weight_pd = 0.2   # Weight for performance_difference

# Compute the score without reliability factor
df['raw_score'] = weight_wlr * df['weighted_win_loss_ratio'] + weight_pd * merged_df['performance_difference'].fillna(0)

# Adjust the score based on the reliability factor
df['final_score'] = df['raw_score'] * df['reliability_factor']
ranked_df = df.sort_values(by='final_score', ascending=False)
ranked_df['rank'] = ranked_df['final_score'].rank(ascending=False, method='min')

ranked_df[:25]


Unnamed: 0,region,slug,name,acronym,team_id,nwin,nloss,win_loss_ratio,ntot,team_code,weighted_win_loss_ratio,reliability_factor,raw_score,final_score,rank
150,KOREA,t1,T1,T1,98767991853197861,256,97,0.725212,353,T1,1.045391,1.0,0.517653,0.517653,1.0
407,KOREA,geng,Gen.G,GEN,100205573495116443,229,65,0.778912,294,GEN,1.074887,1.0,0.516087,0.516087,2.0
24,KOREA,dwg-kia,Dplus Kia,DK,100725845018863243,179,99,0.643885,278,DK,0.877378,1.0,0.464458,0.464458,3.0
277,"HONG KONG, MACAU, TAIWAN",psg-talon,PSG Talon,PSG,104367068120825486,159,36,0.815385,195,PSG,1.033444,1.0,0.454698,0.454698,4.0
257,CHINA,jd-gaming,Beijing JDG Intel Esports Club,JDG,99566404852189289,45,6,0.882353,51,JDG,0.950414,1.0,0.447308,0.447308,5.0
267,INTERNATIONAL,royal-never-give-up,Royal Never Give Up,RNG,98767991892579754,65,18,0.783133,83,RNG,0.879194,1.0,0.439597,0.439597,6.0
145,NORTH AMERICA,cloud9,Cloud9,C9,98767991877340524,226,97,0.69969,323,C9,0.987048,1.0,0.435165,0.435165,7.0
48,NORTH AMERICA,team-liquid,Team Liquid Honda,TL,98926509885559666,219,141,0.608333,360,TL,0.8812,1.0,0.430045,0.430045,8.0
144,INTERNATIONAL,dwg-kia,Dplus Kia,DK,100725845018863243,51,15,0.772727,66,DK,0.849022,1.0,0.424511,0.424511,9.0
288,EMEA,g2-esports,G2 Esports,G2,98767991926151025,162,65,0.713656,227,G2,0.931476,1.0,0.424499,0.424499,10.0


In [154]:
final_ranking = ranked_df.copy()

final_ranking = final_ranking[final_ranking['team_id'].isin(teams)]

In [155]:
final_ranking = final_ranking[final_ranking['region'] != 'INTERNATIONAL']
# Create a placeholder dataframe for missing teams
missing_teams_list = [team for team in teams if team not in final_ranking['team_id'].values]
missing_teams_df = pd.DataFrame({
    'team_id': missing_teams_list,
    'region': [None] * len(missing_teams_list),
    'slug': [None] * len(missing_teams_list),
    'name': [None] * len(missing_teams_list),
    'acronym': [None] * len(missing_teams_list),
    'nwin': [None] * len(missing_teams_list),
    'nloss': [None] * len(missing_teams_list),
    'win_loss_ratio': [None] * len(missing_teams_list),
    'ntot': [None] * len(missing_teams_list),
    'team_code': [None] * len(missing_teams_list),
    'weighted_win_loss_ratio': [None] * len(missing_teams_list),
    'reliability_factor': [None] * len(missing_teams_list),
    'raw_score': [None] * len(missing_teams_list),
    'final_score': [None] * len(missing_teams_list),
    'rank': [None] * len(missing_teams_list)
})

# Append the placeholder dataframe to the final ranking
final_ranking = pd.concat([final_ranking, missing_teams_df], ignore_index=True)

# Sort by the final_score and reset the index
final_ranking = final_ranking.sort_values(by='final_score', ascending=False).reset_index(drop=True)

# Assign numerical rank based on the final_score
final_ranking['rank'] = final_ranking['final_score'].rank(method='min', ascending=False, na_option='bottom')

final_ranking


Unnamed: 0,region,slug,name,acronym,team_id,nwin,nloss,win_loss_ratio,ntot,team_code,weighted_win_loss_ratio,reliability_factor,raw_score,final_score,rank
0,"HONG KONG, MACAU, TAIWAN",psg-talon,PSG Talon,PSG,104367068120825486,159.0,36.0,0.815385,195.0,PSG,1.033444,1.0,0.454698,0.454698,1.0
1,VIETNAM,gam-esports,GAM Esports,GAM,98767991954244555,56.0,11.0,0.835821,67.0,GAM,0.919535,1.0,0.317603,0.317603,2.0
2,EMEA,movistar-riders,Movistar Riders,MRS,103877625775457850,153.0,104.0,0.595331,257.0,MRS,0.797379,1.0,0.279623,0.279623,3.0
3,,,,,105913111502565010,,,,,,,,,,4.0


In [158]:
team_data = recent_game_stats(['98767991954244555', '103877625775457850', '104367068120825486', '105913111502565010'])

df = pd.DataFrame(team_data)

# 1. Compute Basic Metrics
df['nwin'] = df['nwin'].astype(int)
df['nloss'] = df['nloss'].astype(int)
df['win_loss_ratio'] = df.apply(lambda row: row['nwin'] if row['nloss'] == 0 else row['nwin'] / (row['nwin'] + row['nloss']), axis=1)
df['ntot'] = df['nwin'] + df['nloss']
df['team_code'] = df['acronym']

min_games_threshold = 10
max_games_played = 50  # Teams with games more than this value get full reliability

# Calculate the reliability factor
df['reliability_factor'] = np.clip(df['ntot'] / max_games_played, 0, 1)

# Modify the weighted win-loss ratio formula with a logarithmic factor
df['weighted_win_loss_ratio'] = df['win_loss_ratio'] * (1 + np.log1p(df['ntot'] / max_games_played))

# 2. Regional Strength Metrics
grouped = df.groupby('region')
top_teams_threshold = 5
avg_win_loss_by_region = grouped.apply(lambda x: x.nlargest(top_teams_threshold, 'win_loss_ratio')['win_loss_ratio'].mean())
total_games_by_region = df.groupby('region')['ntot'].sum()
region_strength = 1.5 * avg_win_loss_by_region + 0.1 * (total_games_by_region / total_games_by_region.max())
region_strength_normalized = region_strength / region_strength.sum()

# 3. International Performance Metrics
international_df = df[df['region'] == 'INTERNATIONAL'].copy()
international_df['original_region'] = international_df['name']
regional_win_rates_international = international_df.groupby('original_region')['win_loss_ratio'].mean()

# 4. Comparison of Local vs. International Performance
merged_df = df.merge(international_df[['team_id', 'win_loss_ratio']], on='team_id', how='left', suffixes=('_local', '_international'))
merged_df['win_loss_ratio_international'].fillna(0, inplace=True)  # Fill NaN values with 0
merged_df['performance_difference'] = merged_df['win_loss_ratio_international'] - merged_df['win_loss_ratio_local']

# 5. Final Score & Ranking
weight_wlr = 0.5  # Weight for weighted_win_loss_ratio
weight_pd = 0.2   # Weight for performance_difference

# Compute the score without reliability factor
df['raw_score'] = weight_wlr * df['weighted_win_loss_ratio'] + weight_pd * merged_df['performance_difference'].fillna(0)

# Adjust the score based on the reliability factor
df['final_score'] = df['raw_score'] * df['reliability_factor']
ranked_df = df.sort_values(by='final_score', ascending=False)
ranked_df['rank'] = ranked_df['final_score'].rank(ascending=False, method='min')

final_ranking = ranked_df.copy()

final_ranking = final_ranking[final_ranking['team_id'].isin(teams)]

final_ranking = final_ranking[final_ranking['region'] != 'INTERNATIONAL']
# Create a placeholder dataframe for missing teams
missing_teams_list = [team for team in teams if team not in final_ranking['team_id'].values]
missing_teams_df = pd.DataFrame({
    'team_id': missing_teams_list,
    'region': [None] * len(missing_teams_list),
    'slug': [None] * len(missing_teams_list),
    'name': [None] * len(missing_teams_list),
    'acronym': [None] * len(missing_teams_list),
    'nwin': [None] * len(missing_teams_list),
    'nloss': [None] * len(missing_teams_list),
    'win_loss_ratio': [None] * len(missing_teams_list),
    'ntot': [None] * len(missing_teams_list),
    'team_code': [None] * len(missing_teams_list),
    'weighted_win_loss_ratio': [None] * len(missing_teams_list),
    'reliability_factor': [None] * len(missing_teams_list),
    'raw_score': [None] * len(missing_teams_list),
    'final_score': [None] * len(missing_teams_list),
    'rank': [None] * len(missing_teams_list)
})

# Append the placeholder dataframe to the final ranking
final_ranking = pd.concat([final_ranking, missing_teams_df], ignore_index=True)

# Sort by the final_score and reset the index
final_ranking = final_ranking.sort_values(by='final_score', ascending=False).reset_index(drop=True)

# Assign numerical rank based on the final_score
final_ranking['rank'] = final_ranking['final_score'].rank(method='min', ascending=False, na_option='bottom')


In [160]:
final_ranking = ranked_df.copy()

final_ranking = final_ranking[final_ranking['team_id'].isin(teams)]

final_ranking = final_ranking[final_ranking['region'] != 'INTERNATIONAL']
# Create a placeholder dataframe for missing teams
missing_teams_list = [team for team in teams if team not in final_ranking['team_id'].values]
missing_teams_df = pd.DataFrame({
    'team_id': missing_teams_list,
    'region': [None] * len(missing_teams_list),
    'slug': [None] * len(missing_teams_list),
    'name': [None] * len(missing_teams_list),
    'acronym': [None] * len(missing_teams_list),
    'nwin': [None] * len(missing_teams_list),
    'nloss': [None] * len(missing_teams_list),
    'win_loss_ratio': [None] * len(missing_teams_list),
    'ntot': [None] * len(missing_teams_list),
    'team_code': [None] * len(missing_teams_list),
    'weighted_win_loss_ratio': [None] * len(missing_teams_list),
    'reliability_factor': [None] * len(missing_teams_list),
    'raw_score': [None] * len(missing_teams_list),
    'final_score': [None] * len(missing_teams_list),
    'rank': [None] * len(missing_teams_list)
})

# Append the placeholder dataframe to the final ranking
final_ranking = pd.concat([final_ranking, missing_teams_df], ignore_index=True)

# Sort by the final_score and reset the index
final_ranking = final_ranking.sort_values(by='final_score', ascending=False).reset_index(drop=True)

# Assign numerical rank based on the final_score
final_ranking['rank'] = final_ranking['final_score'].rank(method='min', ascending=False, na_option='bottom')

final_ranking

In [161]:
final_ranking = final_ranking[final_ranking['region'] != 'INTERNATIONAL']
# Create a placeholder dataframe for missing teams
missing_teams_list = [team for team in teams if team not in final_ranking['team_id'].values]
missing_teams_df = pd.DataFrame({
    'team_id': missing_teams_list,
    'region': [None] * len(missing_teams_list),
    'slug': [None] * len(missing_teams_list),
    'name': [None] * len(missing_teams_list),
    'acronym': [None] * len(missing_teams_list),
    'nwin': [None] * len(missing_teams_list),
    'nloss': [None] * len(missing_teams_list),
    'win_loss_ratio': [None] * len(missing_teams_list),
    'ntot': [None] * len(missing_teams_list),
    'team_code': [None] * len(missing_teams_list),
    'weighted_win_loss_ratio': [None] * len(missing_teams_list),
    'reliability_factor': [None] * len(missing_teams_list),
    'raw_score': [None] * len(missing_teams_list),
    'final_score': [None] * len(missing_teams_list),
    'rank': [None] * len(missing_teams_list)
})

# Append the placeholder dataframe to the final ranking
final_ranking = pd.concat([final_ranking, missing_teams_df], ignore_index=True)

# Sort by the final_score and reset the index
final_ranking = final_ranking.sort_values(by='final_score', ascending=False).reset_index(drop=True)

# Assign numerical rank based on the final_score
final_ranking['rank'] = final_ranking['final_score'].rank(method='min', ascending=False, na_option='bottom')

final_ranking

Unnamed: 0,region,slug,name,acronym,team_id,nwin,nloss,win_loss_ratio,ntot,team_code,reliability_factor,weighted_win_loss_ratio,raw_score,final_score,rank
0,"HONG KONG, MACAU, TAIWAN",psg-talon,PSG Talon,PSG,104367068120825486,159.0,36.0,0.815385,195.0,PSG,1.0,2.111223,0.993587,0.993587,1.0
1,EMEA,movistar-riders,Movistar Riders,MRS,103877625775457850,153.0,104.0,0.595331,257.0,MRS,1.0,1.675752,0.71881,0.71881,2.0
2,VIETNAM,gam-esports,GAM Esports,GAM,98767991954244555,56.0,11.0,0.835821,67.0,GAM,1.0,1.546395,0.631033,0.631033,3.0
3,,,,,105913111502565010,,,,,,,,,,4.0
