# Valorant Champions Tour Statistics Web Scraper (Year)
#### The purpose of this program is to aggregate player statistics in the main events (closed qualifier events, group stage events, playoffs) of the VCT North American Circuit by year.

# Importing Packages

In [1]:
import numpy as np
import pandas as pd
import html5lib
from ipynb.fs.full.event_id_scraper import event_links

# Creating Links to Stat Sheets

Using the event IDs that were scraped previously, links to each stat sheet were generated for each event.
These links were then grouped by year.

In [2]:
links = []
#Uses list of event IDs scraped using event_id_scraper
event_links

[['1130', '2190'],
 ['800', '1953'],
 ['800', '1561'],
 ['799', '1737'],
 ['799', '1559'],
 ['558', '1094'],
 ['578', '1195'],
 ['576', '1132'],
 ['520', '1131'],
 ['372', '770'],
 ['371', '769'],
 ['370', '767'],
 ['333', '690'],
 ['324', '664'],
 ['306', '618'],
 ['291', '592']]

In [3]:
#Converts event IDs into links
for link in event_links:
    links.append("https://www.vlr.gg/stats/?event_group_id=all&event_id="+str(link[0])+"&series_id="+str(link[1])+"&subseries_id=all&region=all&country=all&min_rounds=&min_rating=&agent=all&map_id=all&timespan=all")
#Splits events based on year
links22 = links[:5]
links21 = links[5:]
links21

['https://www.vlr.gg/stats/?event_group_id=all&event_id=558&series_id=1094&subseries_id=all&region=all&country=all&min_rounds=&min_rating=&agent=all&map_id=all&timespan=all',
 'https://www.vlr.gg/stats/?event_group_id=all&event_id=578&series_id=1195&subseries_id=all&region=all&country=all&min_rounds=&min_rating=&agent=all&map_id=all&timespan=all',
 'https://www.vlr.gg/stats/?event_group_id=all&event_id=576&series_id=1132&subseries_id=all&region=all&country=all&min_rounds=&min_rating=&agent=all&map_id=all&timespan=all',
 'https://www.vlr.gg/stats/?event_group_id=all&event_id=520&series_id=1131&subseries_id=all&region=all&country=all&min_rounds=&min_rating=&agent=all&map_id=all&timespan=all',
 'https://www.vlr.gg/stats/?event_group_id=all&event_id=372&series_id=770&subseries_id=all&region=all&country=all&min_rounds=&min_rating=&agent=all&map_id=all&timespan=all',
 'https://www.vlr.gg/stats/?event_group_id=all&event_id=371&series_id=769&subseries_id=all&region=all&country=all&min_rounds=&

In [4]:
dfs = pd.read_html(links21[3])
df = dfs[0]
df.head(5)

Unnamed: 0,Player,Agents,Rnd,ACS,K:D,KAST,ADR,KPR,APR,FKPR,FDPR,HS%,CL%,CL,KMax,K,D,A,FK,FD
0,TenZ SEN,(+1),354,275.9,1.4,,164.5,0.97,0.2,0.22,0.13,19%,16%,5/31,31,345,247,70,77,45
1,Asuna 100T,,211,245.7,1.22,,153.8,0.87,0.2,0.19,0.14,22%,43%,6/14,48,183,150,43,41,30
2,Oderus NYFU,,151,243.1,1.12,,165.0,0.83,0.23,0.13,0.17,28%,14%,1/7,26,125,112,34,20,26
3,PureR SQ,,276,225.8,1.14,,136.1,0.8,0.16,0.17,0.14,25%,36%,9/25,25,222,195,43,47,38
4,Shawn GEN,,165,220.4,0.95,,145.0,0.74,0.21,0.1,0.14,21%,20%,4/20,23,122,129,35,17,23


# Function: convert_percentage
Void function. Converts percentages from a database represented as strings into floats so that calculations may be performed.

In [5]:
#Converts percentages represented as strings into floats which can be calculated
def convert_percentage(df):
    for i in df.index:
        
        #Cleaning KAST Percentages
        if(type(df.iloc[i,3])==float):
            df.iloc[i,3] = 0
        #Adjusts for if KAST was not recorded in the stats    
        elif(isinstance(df.iloc[i,3], np.float64)):
            df.iloc[i,3] = -1
        else:
            df.iloc[i,3] = df.iloc[i,3].replace('%','')
            df.iloc[i,3] = (int(df.iloc[i,3]))/100
        
        #Cleaning Headshot Percentages
        #Adjusts for if HS% was not recorded in the stats or player didn't headshot
        if(type(df.iloc[i,9])==float):
            df.iloc[i,9] = 0
        else:
            df.iloc[i,9] = df.iloc[i,9].replace('%','')
            df.iloc[i,9] = (int(df.iloc[i,9]))/100
        
        #Cleaning Clutch Percentages
        #Adjusts for if KAST was not recorded in the stats or the player had no clutches   
        if(type(df.iloc[i,10])==float):
            df.iloc[i,10] = 0
        else:
            df.iloc[i,10] = df.iloc[i,10].replace('%','')
            df.iloc[i,10] = (int(df.iloc[i,10]))/100
    

# Class: Player
##### Stores all relevant stats of a player in a certain year.
#### set_stats: takes stats passed through by a database as an array and adds them to player total
#### get_stats: returns total kills, assists, deaths, first kills/deaths, rounds played, average acs, adr, hs %, clutch % per map.

In [14]:
#Uses objects to store data before passing into database
class Player():
    def __init__(self, arr):
        self.name = arr[18]
        self.team = arr[19]
        self.rounds = float(arr[0])
        self.acs = float(arr[1])  
        self.k =float(arr[13])
        self.d =float(arr[14])  
        self.a =float(arr[15])
        self.kast = 0
        #First: Used to check if this is first event with KAST
        #Prevents div by 0 and adding an extra game
        self.first = True
        self.wkast = 1
        
        if(float(arr[3]) <-0.1):
            pass
        else:
            self.kast +=float(arr[3])
            if(self.first):
                self.first = False
            else:
                self.wkast+=1
        
        self.adr =float(arr[4]) 
        self.hsp =float(arr[9]) 
        self.fk =float(arr[16]) 
        self.fd =float(arr[17])
        self.clutchp = float(arr[10])
        self.games = 1
        
    def set_stats(self, arr):
        self.games +=1
        self.rounds += float(arr[0])
        self.acs += float(arr[1])  
        self.k += float(arr[13])
        self.d += float(arr[14])  
        self.a += float(arr[15])
        
        if(float(arr[3]) <-0.1):
            pass
        else:
            self.kast +=float(arr[3])
            if(self.first):
                self.first = False
            else:
                self.wkast+=1
        
        self.adr +=float(arr[4]) 
        self.hsp +=float(arr[9]) 
        self.fk +=float(arr[16]) 
        self.fd +=float(arr[17])
        self.clutchp +=float(arr[10])
            
    def get_name(self):
        return self.name
    
    def get_stats(self):
        return [self.name, self.team, self.rounds, self.acs/self.games, self.k/self.rounds, self.a/self.rounds, self.k/self.d, self.kast/self.wkast, self.adr/self.games, self.hsp/self.games, self.fk/self.rounds, self.fd/self.rounds, self.fk/self.fd,self.clutchp/self.games]

# Function: clean_data
Takes in a list of stat sheet links. Returns a list of Player objects.

In [7]:
#Fully cleans data and creates a player instance for each player
def clean_data(links):
    player_list=[]
    name_list=[]
    
    for link in links:
        #Reads stat sheet and converts it into Pandas dataframe
        dfs = pd.read_html(link)
        df = dfs[0]
        df.head(5)
        
        #Deletes Agents column and creates the Name and Team columns
        del df['Agents']
        df['Name'] = "s"
        df['Team'] = "s"
        
        #Setting Name and Team columns
        for i in df.index:
            s = df.loc[i, 'Player'].split()
            df.iloc[i, 19] = s[0]
            df.iloc[i, 20]= s[1]
        
        del df['Player']

        convert_percentage(df)
        #Creating instances for each player
        for x, i in enumerate(df.index):
            #If player has already appeared previously (has a Player object), update players stats.
            if df.loc[i]['Name'] in name_list:
                index = name_list.index(df.loc[i]['Name'])
                player_list[index].set_stats(df.iloc[i].to_numpy())
            #Else, create a new player object and update stats.
            else:
                player_list.append(Player(df.iloc[i].to_numpy()))
                name_list.append(df.loc[i]['Name'])
    #Returns list of player objects that played that year
    return (player_list)

# Creating Dataframe
A dataframe containing each players statistics in each year is created and then saved as an Excel sheet. Will add K:D, KDA, FK:FD, KPR, DPR, APR to replace K, D, A, FK, FD.

In [15]:
df22 = pd.DataFrame(columns = ['Name', 'Team','Rounds Played', 'ACS', 'KPR', 'APR', 'K:D', 'KAST', 'ADR', 'HS%', 'FKPR', 'FDPR','FK:FD', 'Clutch %'])
player_list = clean_data(links22)

#Creates a new dataframe with the stats from each player.
for player in player_list:
    df22.loc[len(df22)] = player.get_stats()
    
df22.to_excel('VCTNA_Stats22.xlsx', sheet_name='2022')

In [16]:
df21 = pd.DataFrame(columns = ['Name', 'Team','Rounds Played', 'ACS', 'KPR', 'APR', 'K:D', 'KAST', 'ADR', 'HS%', 'FKPR', 'FDPR','FK:FD', 'Clutch %'])
player_list = clean_data(links21)

for player in player_list:
    df21.loc[len(df21)] = player.get_stats()

#Adjusts for the fact that KAST was introduced during the last event of 2021.
for i in df21.index:
    if (df21.iloc[i, 7]==0):
        df21.iloc[i,7] = "N/A"
    
df21.to_excel('VCTNA_Stats21.xlsx', sheet_name='2021')