# Event Stat Scraper (places all stats into array)

### Scrapes statistics of players in each event and converts it into a graph.

In [1]:
import numpy as np
import pandas as pd
import html5lib
from bs4 import BeautifulSoup
import requests


In [11]:
event_links = ['/event/matches/1130/?series_id=2190',
 '/event/matches/800/?series_id=1953',
 '/event/matches/800/?series_id=1561',
 '/event/matches/799/?series_id=1737',
 '/event/matches/799/?series_id=1559',
 '/event/matches/558/?series_id=1094',
 '/event/matches/578/?series_id=1195',
 '/event/matches/576/?series_id=1132',
 '/event/matches/520/?series_id=1131',
 '/event/matches/372/?series_id=770',
 '/event/matches/371/?series_id=769',
 '/event/matches/370/?series_id=767',
 '/event/matches/333/?series_id=690',
 '/event/matches/324/?series_id=664',
 '/event/matches/306/?series_id=618',
 '/event/matches/291/?series_id=592']


In [3]:
for i in range(0, len(event_links)):
    event_links[i]= "http://vlr.gg"+event_links[i]
event_links.pop(0)
event_links

['http://vlr.gg/event/matches/800/?series_id=1953',
 'http://vlr.gg/event/matches/800/?series_id=1561',
 'http://vlr.gg/event/matches/799/?series_id=1737',
 'http://vlr.gg/event/matches/799/?series_id=1559',
 'http://vlr.gg/event/matches/558/?series_id=1094',
 'http://vlr.gg/event/matches/578/?series_id=1195',
 'http://vlr.gg/event/matches/576/?series_id=1132',
 'http://vlr.gg/event/matches/520/?series_id=1131',
 'http://vlr.gg/event/matches/372/?series_id=770',
 'http://vlr.gg/event/matches/371/?series_id=769',
 'http://vlr.gg/event/matches/370/?series_id=767',
 'http://vlr.gg/event/matches/333/?series_id=690',
 'http://vlr.gg/event/matches/324/?series_id=664',
 'http://vlr.gg/event/matches/306/?series_id=618',
 'http://vlr.gg/event/matches/291/?series_id=592']

In [4]:
all_links = []
for event in event_links:
    resp = requests.get(event)
    soup = BeautifulSoup(resp.text)
    first_match_list = soup.find_all('a', class_='wf-module-item match-item mod-color mod-left mod-bg-after-red mod-first', href = True)
    second_match_list = soup.find_all('a', class_='wf-module-item match-item mod-color mod-left mod-bg-after-red', href = True)
    match_list = first_match_list + second_match_list

    link_list = []

    for match in match_list:
        link_list.append(match['href'])

    for i, link in enumerate(link_list):
        link_list[i] = "https://vlr.gg"+link_list[i]+"?game=all&tab=overview"

    all_links += link_list
all_links

['https://vlr.gg/107143/faze-clan-vs-100-thieves-champions-tour-north-america-stage-2-challengers-ubr1?game=all&tab=overview',
 'https://vlr.gg/107142/ghost-gaming-vs-nrg-esports-champions-tour-north-america-stage-2-challengers-lbr1?game=all&tab=overview',
 'https://vlr.gg/107435/xset-vs-100-thieves-champions-tour-north-america-stage-2-challengers-ubsf?game=all&tab=overview',
 'https://vlr.gg/107739/evil-geniuses-vs-nrg-esports-champions-tour-north-america-stage-2-challengers-lbr2?game=all&tab=overview',
 'https://vlr.gg/107737/xset-vs-optic-gaming-champions-tour-north-america-stage-2-challengers-ubf?game=all&tab=overview',
 'https://vlr.gg/107742/xset-vs-faze-clan-champions-tour-north-america-stage-2-challengers-lbf?game=all&tab=overview',
 'https://vlr.gg/107743/optic-gaming-vs-xset-champions-tour-north-america-stage-2-challengers-gf?game=all&tab=overview',
 'https://vlr.gg/107144/ghost-gaming-vs-evil-geniuses-champions-tour-north-america-stage-2-challengers-ubr1?game=all&tab=overvie

In [5]:
def clean_data(df):
    df.drop('Unnamed: 1', axis=1, inplace=True)
    df['Name'] = 'x'
    df['Team'] = 'x'

    for i in df.index:
        s = df.at[i, 'Unnamed: 0'].split()
        df.at[i,'Name'] = s[0]
        df.at[i,'Team']= s[1]
    
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df.drop('+/–', axis=1, inplace=True)
    df.drop('+/–.1', axis=1, inplace=True)

    
    for i in df.index:
        for col in df.columns:
            if(isinstance(df.at[i,col], np.float64)):
                df.at[i, col] = 0
            else:
                s = df.at[i, col].split(" ")
                df.at[i,col] = s[0]
                if col == 'D':
                    df.at[i,col] = s[2]




In [6]:
def convert_percentage(df):
    for i in df.index:
         #Cleaning KAST Percentages
        if(type(df.at[i, 'KAST'])==float):
            df.at[i, 'KAST'] = 0
        #Adjusts for if KAST was not recorded in the stats    
        elif(isinstance(df.at[i,'KAST'], np.floating)):
            df.at[i,'KAST'] = -1
        else:
            df.at[i,'KAST']= df.at[i,'KAST'].replace('%','')
        
        #Cleaning Headshot Percentages
        #Adjusts for if HS% was not recorded in the stats or player didn't headshot
        if(type(df.at[i,'HS%'])==float):
            df.at[i,'HS%'] = 0
        else:
            df.at[i,'HS%'] = df.at[i,'HS%'].replace('%','')
        
        #Cleaning Clutch Percentages
        #Adjusts for if KAST was not recorded in the stats or the player had no clutches  
        if 'CL%' in df.columns:
            if(type(df.at[i,'CL%'])==float):
                df.at[i,'CL%'] = 0
            else:
                df.at[i,'CL%'] = df.at[i,'CL%'].replace('%','')

In [7]:
dfs = pd.read_html(link_list[0])
df = dfs[3]
df1 = dfs[2]
df.head(5)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,ACS,K,D,A,+/–,KAST,ADR,HS%,FK,FD,+/–.1
0,PureR XSET,,242 133,67 32 35,/ 54 23 31 /,10 7 3,+13 +9 +4,,157 81,24% 31% 19%,8 4 4,7 3 4,+1 +1 0
1,BcJ XSET,,205 109,49 22 27,/ 54 24 30 /,35 17 18,-5 -2 -3,,137 70,21% 24% 18%,4 2 2,7 5 2,-3 -3 0
2,WeDid XSET,,180 82,52 31 21,/ 55 23 32 /,16 8 8,-3 +8 -11,,124 52,28% 30% 26%,12 5 7,6 2 4,+6 +3 +3
3,thwifo XSET,,178 105,43 21 22,/ 55 23 32 /,23 8 15,-12 -2 -10,,118 73,29% 34% 26%,11 4 7,10 3 7,+1 +1 0
4,AYRIN XSET,,170 103,45 19 26,/ 59 29 30 /,19 7 12,-14 -10 -4,,115 71,17% 22% 15%,4 2 2,6 1 5,-2 +1 -3


In [8]:
clean_data(df)
df.head(5)

Unnamed: 0,ACS,K,D,A,KAST,ADR,HS%,FK,FD,Name,Team
0,242,67,54,10,0.0,157,24%,8,7,PureR,XSET
1,205,49,54,35,0.0,137,21%,4,7,BcJ,XSET
2,180,52,55,16,0.0,124,28%,12,6,WeDid,XSET
3,178,43,55,23,0.0,118,29%,11,10,thwifo,XSET
4,170,45,59,19,0.0,115,17%,4,6,AYRIN,XSET


In [32]:
class Player():
    def __init__(self, arr):
        self.name = arr[9]
        self.team = arr[10]
        self.acs = [float(arr[0])]
        self.k =[float(arr[1])] 
        self.d =[float(arr[2])]  
        self.a =[float(arr[3])] 
        self.kast =[float(arr[4])] 
        self.adr =[float(arr[5])] 
        self.hsp =[float(arr[6])] 
        self.fk =[float(arr[7])] 
        self.fd =[float(arr[8])] 
        self.games = []
    
    def set_stats(self, arr):
        self.acs.append(float(arr[0]))
        self.k.append(float(arr[1]) )
        self.d.append(float(arr[2]))  
        self.a.append(float(arr[3])) 
        self.kast.append(float(arr[4])) 
        self.adr.append(float(arr[5])) 
        self.hsp.append(float(arr[6])) 
        self.fk.append(float(arr[7]))
        self.fd.append(float(arr[8])) 
    def get_name(self):
        return self.name
    
    def get_stats(self):
        return [self.name, self.team, self.acs, self.k, self.d, self.a, self.kast, self.adr, self.hsp, self.fk, self.fd]

In [33]:
player_names =[]
player_list =[]

In [39]:
for link in all_links:
    dfs = pd.read_html(link)
    df = dfs[2]
    df1 = dfs[3]
    clean_data(df)
    clean_data(df1)
    convert_percentage(df)
    convert_percentage(df1)
    for x, i in enumerate(df.index):
        if df.iloc[i]['Name'] in player_names:
            index = player_names.index(df.iloc[i]['Name'])
            player_list[index].set_stats(df.iloc[i].to_numpy())
        else:
            player_list.append(Player(df.iloc[i].to_numpy()))
            player_names.append(df.iloc[i]['Name'])
    for x, i in enumerate(df1.index):
        if df1.iloc[i]['Name'] in player_names:

            index = player_names.index(df1.iloc[i]['Name'])
            player_list[index].set_stats(df1.iloc[i].to_numpy())
        else:
            player_list.append(Player(df1.iloc[i].to_numpy()))
            player_names.append(df1.iloc[i]['Name'])

In [44]:
player_list[6].__dict__

{'name': 'BcJ',
 'team': 'XSET',
 'acs': [205.0,
  282.0,
  213.0,
  258.0,
  197.0,
  153.0,
  231.0,
  195.0,
  217.0,
  173.0,
  228.0,
  169.0,
  148.0,
  131.0,
  158.0,
  175.0,
  163.0,
  208.0,
  192.0,
  211.0,
  202.0,
  131.0,
  187.0,
  181.0,
  226.0,
  251.0,
  249.0,
  162.0,
  220.0,
  199.0,
  181.0,
  237.0,
  130.0,
  274.0,
  193.0,
  193.0,
  241.0,
  252.0,
  159.0,
  232.0,
  226.0,
  203.0,
  215.0,
  231.0,
  238.0,
  302.0,
  255.0,
  240.0,
  205.0,
  282.0,
  213.0,
  258.0],
 'k': [49.0,
  48.0,
  50.0,
  36.0,
  28.0,
  43.0,
  72.0,
  61.0,
  49.0,
  23.0,
  34.0,
  27.0,
  17.0,
  19.0,
  19.0,
  37.0,
  29.0,
  40.0,
  40.0,
  30.0,
  38.0,
  18.0,
  24.0,
  39.0,
  51.0,
  35.0,
  30.0,
  21.0,
  50.0,
  28.0,
  36.0,
  66.0,
  22.0,
  59.0,
  28.0,
  46.0,
  80.0,
  27.0,
  18.0,
  39.0,
  42.0,
  50.0,
  32.0,
  26.0,
  27.0,
  44.0,
  34.0,
  29.0,
  49.0,
  48.0,
  50.0,
  36.0],
 'd': [54.0,
  33.0,
  41.0,
  33.0,
  26.0,
  58.0,
  61.0,
  49.0,
