In [1]:
# import dependencies
from bs4 import BeautifulSoup
import requests
import re
import timeit
import pandas as pd

### Load Data

In [2]:
file_path = '../Data/database.csv'
stats_df = pd.read_csv(file_path)
stats_df.head(5)

Unnamed: 0,game_id,map_name,map_duration,player_name,agent,team_abrev,kills,deaths,assists,adr,fk,fd,clutches,aces,score_dif,round_total
0,0,Ascent,1:05:24,mazin,Omen,MIBR,20.0,15.0,10.0,161.0,3.0,1.0,1,0,-4,22
1,0,Ascent,1:05:24,artzin,Yoru,MIBR,18.0,15.0,9.0,139.0,7.0,0.0,0,0,-4,22
2,0,Ascent,1:05:24,ShahZaM,Sova,MIBR,12.0,14.0,9.0,122.0,2.0,1.0,0,0,-4,22
3,0,Ascent,1:05:24,liazzi,Killjoy,MIBR,13.0,14.0,5.0,126.0,0.0,2.0,0,0,-4,22
4,0,Ascent,1:05:24,Pa1nt,Iso,MIBR,11.0,17.0,6.0,98.0,2.0,4.0,0,0,-4,22


In [3]:
file_path = '../Data/series_df.csv'
series_df = pd.read_csv(file_path)
# Add series_id column starting at 1
series_df['series_id'] = range(1, len(series_df) + 1)
series_df.head(5)

Unnamed: 0,home_team,away_team,home_round_difference,away_round_difference,num_maps,series_id
0,MIBR,LEV,-10,10,2,1
1,SEN,NRG,7,-7,2,2
2,FUR,100T,4,-4,2,3
3,LOUD,EG,-3,3,3,4
4,G2,C9,-1,1,3,5


In [4]:
file_path = '../Data/scores_data.csv'
games_df = pd.read_csv(file_path)
games_df.head()

Unnamed: 0,game_id,map_name,home_team,away_team,map_duration,home_score,away_score
0,0,Ascent,MIBR,LEV,1:05:24,9,13
1,1,Icebox,MIBR,LEV,45:04,7,13
2,2,Lotus,SEN,NRG,59:21,13,8
3,3,Sunset,SEN,NRG,1:00:54,14,12
4,4,Icebox,FUR,100T,1:12:51,14,12


In [5]:
file_path = '../Data/player_data.csv'
player_df = pd.read_csv(file_path)
# Add series_id column starting at 1
player_df['player_id'] = range(1, len(player_df) + 1)
player_df.head()

Unnamed: 0,player_name,team_abrev,player_id
0,Apoth,EG,1
1,Asuna,100T,2
2,Boostio,100T,3
3,C0M,LEV,4
4,Cryocells,100T,5


## Create player_stats

##### player_id, series_id, game_id, agent, kills, deaths, assists, fk, fd, clutches, aces, adr, points

In [6]:
# Merge the DataFrames on game_id and map_name
merged_df = pd.merge(stats_df, games_df[['game_id', 'map_name', 'home_team', 'away_team']], on=['game_id', 'map_name'])

# merge player_df on player_name and team_abrev to add player_id
merged_df = pd.merge(merged_df, player_df[['player_name', 'team_abrev', 'player_id']], on=['player_name', 'team_abrev'])

# merge series_df on home_team and away_team
final_df = pd.merge(merged_df, series_df, on=['home_team', 'away_team'])

In [7]:
# Dropping columns from final_df
columns_to_drop = ['map_duration', 'round_total', 'home_round_difference', 'away_round_difference', 'num_maps']
final_df = final_df.drop(columns=columns_to_drop)

In [8]:
# reorder the columns
new_order = ['player_id', 'series_id', 'game_id', 'agent', 'kills', 'deaths', 'assists', 'fk', 'fd', 
             'clutches', 'aces', 'adr']
final_df = final_df[new_order]

In [9]:
# Define points allocation dictionary
points_allocation = {
    "kills": 1,
    "deaths": -0.5,
    "assists": 0.5,
    "fk": 2,
    "fd": -1,
    "clutches": 2,
    "aces": 3,
    "adr": 0.1
}

# Function to calculate points
def calculate_points(row):
    points = (
        row['kills'] * points_allocation['kills'] +
        row['deaths'] * points_allocation['deaths'] +
        row['assists'] * points_allocation['assists'] +
        row['fk'] * points_allocation['fk'] +
        row['fd'] * points_allocation['fd'] +
        row['clutches'] * points_allocation['clutches'] +
        row['aces'] * points_allocation['aces'] +
        row['adr'] * points_allocation['adr']
    )
    return points

# Apply the function to calculate points
final_df['points'] = final_df.apply(calculate_points, axis=1)

In [10]:
# Add 1 to every value in the game_id column
final_df['game_id'] = final_df['game_id'] + 1

In [11]:
final_df.head(5)

Unnamed: 0,player_id,series_id,game_id,agent,kills,deaths,assists,fk,fd,clutches,aces,adr,points
0,41,1,1,Omen,20.0,15.0,10.0,3.0,1.0,1,0,161.0,40.6
1,41,1,2,Viper,15.0,14.0,5.0,3.0,1.0,1,0,150.0,32.5
2,26,1,1,Yoru,18.0,15.0,9.0,7.0,0.0,0,0,139.0,42.9
3,26,1,2,Gekko,16.0,15.0,4.0,0.0,3.0,0,0,150.0,22.5
4,20,1,1,Sova,12.0,14.0,9.0,2.0,1.0,0,0,122.0,24.7


In [12]:
# Save DataFrame to CSV file
final_df.to_csv('../Data/import_tables/player_stats.csv', index=False)

## Create player table

##### player_name, team_name

In [13]:
player_df.head()

Unnamed: 0,player_name,team_abrev,player_id
0,Apoth,EG,1
1,Asuna,100T,2
2,Boostio,100T,3
3,C0M,LEV,4
4,Cryocells,100T,5


In [14]:
player_df = player_df.drop(columns=['player_id'])
player_df.head(3)

Unnamed: 0,player_name,team_abrev
0,Apoth,EG
1,Asuna,100T
2,Boostio,100T


In [15]:
# Save DataFrame to CSV file
player_df.to_csv('../Data/import_tables/player.csv', index=False)

## Create games table

##### series_id, map_name, home_team, away_team, map_duration, home_score, away_score

In [16]:
games_df.head(3)

Unnamed: 0,game_id,map_name,home_team,away_team,map_duration,home_score,away_score
0,0,Ascent,MIBR,LEV,1:05:24,9,13
1,1,Icebox,MIBR,LEV,45:04,7,13
2,2,Lotus,SEN,NRG,59:21,13,8


In [17]:
# Merge the DataFrames on game_id and map_name
games_df = pd.merge(series_df, games_df[['map_name', 'home_team', 'away_team', 'map_duration', 'home_score', 'away_score']], on=['home_team', 'away_team'])
games_df.head(5)

Unnamed: 0,home_team,away_team,home_round_difference,away_round_difference,num_maps,series_id,map_name,map_duration,home_score,away_score
0,MIBR,LEV,-10,10,2,1,Ascent,1:05:24,9,13
1,MIBR,LEV,-10,10,2,1,Icebox,45:04,7,13
2,SEN,NRG,7,-7,2,2,Lotus,59:21,13,8
3,SEN,NRG,7,-7,2,2,Sunset,1:00:54,14,12
4,FUR,100T,4,-4,2,3,Icebox,1:12:51,14,12


In [18]:
games_df = games_df.drop(columns=['home_round_difference', 'away_round_difference', 'num_maps'])

In [19]:
new_order = ['series_id', 'map_name', 'home_team', 'away_team', 'map_duration', 'home_score', 'away_score']
games_df = games_df[new_order]

In [20]:
games_df.head(3)

Unnamed: 0,series_id,map_name,home_team,away_team,map_duration,home_score,away_score
0,1,Ascent,MIBR,LEV,1:05:24,9,13
1,1,Icebox,MIBR,LEV,45:04,7,13
2,2,Lotus,SEN,NRG,59:21,13,8


In [21]:
games_df.to_csv('../Data/import_tables/games.csv', index=False)

## Create series table

##### home_team, away_team, home_round_difference, away_round_difference, num_maps

In [22]:
# Adding a split column
series_df['split'] = 2

# Adding a static week column
series_df['week'] = 1

In [23]:
series_df.tail(3)

Unnamed: 0,home_team,away_team,home_round_difference,away_round_difference,num_maps,series_id,split,week
17,MIBR,FUR,-9,9,2,18,2,1
18,NRG,C9,5,-5,3,19,2,1
19,EG,100T,-6,6,3,20,2,1


In [24]:
# Dictionary mapping series_id to week number
series_week_dict = {
    1: 1, 2: 1, 3: 1, 4: 1, 5: 1,
    6: 2, 7: 2, 8: 2, 9: 2, 10: 2, 11: 2, 12: 2, 13: 2, 14: 2, 15: 2, 
    16: 3, 17: 3, 18: 3, 19: 3, 20: 3
}

In [25]:
# Update the week column based on series_week_dict
series_df['week'] = series_df['series_id'].map(series_week_dict)

In [26]:
new_order = ['series_id', 'split', 'week', 'home_team', 'away_team', 'home_round_difference', 'away_round_difference', 'num_maps']
series_df = series_df[new_order]

In [27]:
series_df = series_df.drop(columns=['series_id'])
series_df.head(3)

Unnamed: 0,split,week,home_team,away_team,home_round_difference,away_round_difference,num_maps
0,2,1,MIBR,LEV,-10,10,2
1,2,1,SEN,NRG,7,-7,2
2,2,1,FUR,100T,4,-4,2


In [28]:
# Save DataFrame to CSV file
series_df.to_csv('../Data/import_tables/series.csv', index=False)