# Counter Strikle Webscraper

> The following code scrapes liquidpedia (https://liquipedia.net/counterstrike/Main_Page), in order to make counter strikle (https://blast.tv/counter-strikle) puzzle-solving easier.

# Libs

In [1]:
import re
import requests
import random
import json
import os
import csv
import time

from random import randint
from time import sleep
from bs4 import BeautifulSoup
from datetime import datetime
from collections import defaultdict

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

from tqdm.notebook import tqdm

# Constants

> URL's of the target website, together with the regions that will be mined.

In [2]:
REGIONS = ["Europe", "CIS", "Americas", "Oceania", "Eastern_%26_Southern_Asia"]
URL = "https://liquipedia.net/counterstrike"
REGION_URL = URL + "/Portal:Players"
CSV_FILE = "players.csv"
COLUMNS = ["NAME", "REAL NAME", "REGION", "NATIONALITY", "TEAM", "AGE", "WEAPON", 
           "MAJOR APPEARANCES", "EARNINGS", "LAST UPDATED"]

# Major aggregation

> In order to calculate major appearances all majors are scraped in advance.

In [3]:
page = requests.get(URL + "/Majors")
soup = BeautifulSoup(page.content, "html.parser")
event_elements = soup.find_all("div", {"class": "divRow tournament-card-premier"})
major_list = [event.find("b").find("a")["href"] for event in event_elements]
major_list

['/counterstrike/Intel_Extreme_Masters/2022/Rio',
 '/counterstrike/PGL/2022/Antwerp',
 '/counterstrike/PGL/2021/Stockholm',
 '/counterstrike/ESL/One/2020/Rio',
 '/counterstrike/StarLadder/2019/Major',
 '/counterstrike/Intel_Extreme_Masters/Season_XIII/World_Championship',
 '/counterstrike/FACEIT/2018/Major',
 '/counterstrike/ELEAGUE/2018/Major',
 '/counterstrike/PGL/2017/Krakow',
 '/counterstrike/ELEAGUE/2017/Major',
 '/counterstrike/ESL/One/2016/Cologne',
 '/counterstrike/MLG/2016/Columbus',
 '/counterstrike/DreamHack/2015/Cluj-Napoca',
 '/counterstrike/ESL/One/2015/Cologne',
 '/counterstrike/ESL/One/2015/Katowice',
 '/counterstrike/DreamHack/2014/Winter',
 '/counterstrike/ESL/One/2014/Cologne',
 '/counterstrike/ESL/Major_Series_One/2014/Katowice',
 '/counterstrike/DreamHack/2013/Winter']

## Obtaining All Player Names

> In order to get to player profiles the gamertags need to be scraped first, given that these are used for linking to the player specific pages.

In [4]:
# gets all players from certain region
def fetch_region_players(region):
    page = requests.get(REGION_URL + "/" + region)
    soup = BeautifulSoup(page.content, "html.parser")
    elements = [item.find_all("a")[1] for item in soup.find_all("td")]
    name_dict = [element["title"] for element in elements]
    return name_dict

regional_data = dict()
for region in REGIONS:
    regional_data[region] = fetch_region_players(region)

## Obtaining Player Specific Data

> Using the gamertags it is now possible to get the required counter-strikle data.

In [5]:
def fetch_player(player_name, region, ratelimit=60, debug=False):
    # pull player profile
    page = requests.get(URL + "/" + player_name)
    soup = BeautifulSoup(page.content, "html.parser")
    time.sleep(ratelimit)
    output_dict = dict()
    
    # parse player information
    info_list = soup.find_all("div", {"class": "infobox-cell-2"})
    info_dict = {info_list[i].text[:-1] : info_list[i+1].text for i in range(0, len(info_list),2)}
    if debug:
        print(info_dict)
    # add name to dict
    output_dict["NAME"] = player_name
    output_dict["REGION"] = region
    
    # replace name with romanised name if needed
    if "Romanized Name" in info_dict:
        output_dict["REAL NAME"] = info_dict["Romanized Name"]
    else:
        output_dict["REAL NAME"] = info_dict["Name"]
    
    # get nationality
    if "Nationality" in info_dict:
        output_dict["NATIONALITY"] = info_dict["Nationality"][1:]
    
    # get team
    if "Team"  in info_dict:
        output_dict["TEAM"] = info_dict["Team"]
    
    # Get age
    if "Born" in info_dict:
        try:
            output_dict["AGE"] = datetime.strptime(info_dict["Born"][:-9].replace(",", ""), "%B %d %Y").strftime("%d/%m/%Y")
        except:
            pass
    
    # Get role
    if "Role" in info_dict:
        if "AWPer" in info_dict["Role"]:
            output_dict["WEAPON"] = "AWP"
        else:
            output_dict["WEAPON"] = "AK47"
    elif "Roles" in info_dict:
        if "AWPer" in info_dict["Roles"]:
            output_dict["WEAPON"] = "AWP"
        else:
            output_dict["WEAPON"] = "AK47"
            
    # Get major appearances
    page = requests.get(URL + "/" + player_name + "/Results")
    soup = BeautifulSoup(page.content, "html.parser")
    event_elements = soup.find_all("tr", {"class": "valvemajor-highlighted"})
    event_name_list = [event.find("td", {"style": "text-align:left"}).find("a")["href"] for event in event_elements]
    event_name_list = [event for event in event_name_list if event in major_list]
    output_dict["MAJOR APPEARANCES"] = len(event_name_list)
    
    if "Approx. Total Winnings" in info_dict:
        output_dict["EARNINGS"] = info_dict["Approx. Total Winnings"]
    else:
        output_dict["EARNINGS"] = "0$"
    
    output_dict["LAST UPDATED"] = datetime.now().strftime("%d/%m/%Y")
    return output_dict
    
#fetch_player("MingSir", "Eastern_%26_Southern_Asia", ratelimit=0, debug=True)

## Build csv data

In [8]:
if os.path.exists(CSV_FILE):
    playerdata = pd.read_csv(CSV_FILE)
else:
    playerdata = pd.DataFrame(columns=COLUMNS)

for region in tqdm(REGIONS):
    for player in tqdm(regional_data[region], leave=False):
        if not (playerdata["NAME"].eq(player)).any():
            #print(player)
            row_data = fetch_player(player, region) 
            playerdata = playerdata.append(row_data, ignore_index=True)
playerdata.to_csv(CSV_FILE, index=False)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=935.0), HTML(value='')))




OSError: [Errno 28] No space left on device

In [10]:
playerdata.to_csv(CSV_FILE, index=False)