In [None]:
!py -m venv venv

In [None]:
!py -m pip install requests
!py -m pip install beautifulsoup4

Creates a method to webscrape tournament stat pages for vlr. 

In [None]:
#imports necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
#function to be applied for every vlr tournament, takes the url of the webpage and the name of the file that you want to save it as
def vlr_tournament_webscrape(URL, csv_filename):
    '''Function to scrape through the stats page of a valorant tournament on vlr.gg.
    Uses beautifulsoup to scrape through the html data and extract relevant information.
    example code to run to collect VCT Champs 2022 stats:
    vlr_tournament_webscrape('https://www.vlr.gg/event/stats/1015/valorant-champions-2022', 'vctchamps22')'''
    page = requests.get(URL)

    soup = BeautifulSoup(page.content, "html.parser")
    tbody = soup.find('table').find('tbody').find_all('tr')

    #parsing the web scraped data
    df = pd.DataFrame(columns=['Player', 'Country', 'Team', 'Agent_1', 'Agent_2', 'Agent_3', 'Agent_4', 'Agent_5', 'Rounds', 
                                     'ACS', 'KD', 'KAST', 'ADR', 'KPR', 'APR', 'FKPR', 'FDPR', 'HS_Rate', 'Clutch_Rate', 'Succesful_Clutches', 
                                     'Clutches_Attempted', 'Max_Kills', 'Kills', 'Deaths', 'Assists', 'FK', 'FD'])

    for tr in tbody:
        l = []
        #gets the player name
        player = tr.find('div', class_='text-of').text
        l.append(player)
        #gets the country abbreviation from flag img href
        country = tr.find('i')['class'][1].split('-')[1]
        l.append(country)
        #gets the team name from text
        team = tr.find('div', class_='stats-player-country').text
        l.append(team)
        #get the agents played
        agentlist = tr.find('td', class_='mod-agents').find('div').find_all('img')
        agents=[]
        for i in range(5):
            if i < len(agentlist):
                agents.append(agentlist[i]['src'].split('/')[-1][:-4])
            else:
                agents.append('None')
        l+=agents
        #get number of rounds played
        rounds = tr.find('td', class_='mod-rnd').text
        l.append(int(rounds))
        #get the next few stats (acs to clutch%)
        stats_acs_clutch = tr.find_all('td', class_='mod-color-sq')
        statlist = []
        for stat in stats_acs_clutch:
            temp = stat.find('div').find('span').text
            if temp=='':
                statlist.append(0)
            elif '%' in temp:
                statlist.append(float(temp.replace('%',''))/100)
            else:
                statlist.append(float(temp))
        l+=statlist
        #get the succesful clutches and the attempted clutches
        clutches = tr.find('td', class_='mod-cl').text.strip()
        if clutches!='':
            l.append(int(clutches.split('/')[0]))
            l.append(int(clutches.split('/')[1]))
        else:
            l.append(0)
            l.append(0)
        #get the max kills in a map
        max_kills = tr.find('td', class_='mod-a mod-kmax').text.strip()
        l.append(int(max_kills))
        #get the rest of the stats kills - FD
        raw_stats = tr.find_all('td')[:][-5:]
        for stat in raw_stats:
            l.append(int(stat.text))
        df.loc[len(df.index)] = l  
    
    print(df)
    df.to_csv(csv_filename+'.csv', header=True, index=False)
    

    

In [None]:
#cleans up the data, i.e. replaces the two letter country code with the full country name
def clean_vlr_data(csv_filename, new_csv_filename):
    '''A method that will clean up the webscraped vlr tournament data.
    The main point of this method is to convert the two letter country codes to
    their corresponding countries, as the codes are not always intuitive.
    Example code for how to run:
    clean_vlr_data('vctchamps22', 'vctchamps22')
    '''
    #dictionary of country codes to the country names, more to be added later
    country = {'cl':'Chile', 'us':'United States', 'fi':'Finland', 'cn':'China', 'ca':'Canada', 'jp':'Japan', 'tr':'Turkey',
              'sg':'Singapore', 'lv':'Latvia', 'kr':'Korea', 'se':'Sweden', 'br':'Brazil', 'be':'Belgium', 'id':'Indonesia',
              'un':'Not Representing', 'ru':'Russia', 'th':'Thailand', 'gb':'United Kingdom', 'ar':'Argentina', 'ua':'Ukraine',
              'kz':'Kazhakstan', 'my':'Malaysia', 'fr':'France', 'cz':'Czech Republic', 'hr':'Croatia', 'lt':'Lithuania', 'pl':'Poland',
              'es':'Estonia', 'dk':'Denmark', 'ph':'Philipines'}
    df = pd.read_csv(csv_filename+'.csv')
    #converts the country codes to its corresponding country
    df['Country'] = df['Country'].apply(lambda x: country[x])
    
    print(df)
    df.to_csv(new_csv_filename+'.csv', header=True, index=False)

In [None]:
vlr_tournament_webscrape('https://www.vlr.gg/event/stats/1162/game-changers-brazil-series-2', 'gamechangers_brazil_22')
clean_vlr_data('gamechangers_brazil_22' ,'gamechangers_brazil_22')

In [None]:
#deactivate python env
!conda deactivate