# Counter Strikle Webscraper

# Notebook Description

__Author:__ Daniël Vermaas

This notebook scrapes liquidpedia (https://liquipedia.net/counterstrike/Main_Page), in order to make counter strikle (https://blast.tv/counter-strikle) puzzle-solving easier. Before using the notebook, please read the Liquidpedia ToS about API usage: https://liquipedia.net/api-terms-of-use.

# Libraries & Constants

In [58]:
import os
import csv
import time
from time import sleep
from bs4 import BeautifulSoup
from datetime import datetime

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import requests
#from urllib.request import quote

#!jupyter nbextension enable --py widgetsnbextension
import ipywidgets as widgets
from ipywidgets import interact

In [59]:
HEADERS = {"User-Agent": "Counter-Strikle-Bot","Accept-Encoding": "gzip"}
BASE_URL = "https://liquipedia.net/counterstrike/api.php?"
REGIONS = {"Europe" : ["Europe", "CIS"], "Americas" : ["Americas"],
           "Asia-Pacific" : ["Oceania", "Eastern_%26_Southern_Asia"]}
CSV_FILE = "players.csv"
COLUMNS = ["NAME", "REAL NAME", "REGION", "NATIONALITY", "TEAM", "AGE", "WEAPON", 
           "MAJOR APPEARANCES", "EARNINGS", "LAST UPDATED"]
MISLABELS = ["Kazakhstan", "Turkey"]

# API Calls

In [3]:
# API call function
def fetch_page(page):
    url = BASE_URL +"action=parse&format=json&page=" + page
    response = requests.get(url, HEADERS)
    page_html = response.json()['parse']['text']['*']
    soup = BeautifulSoup(page_html,features="lxml")
    time.sleep(50)
    return soup

# Fetches list of all majors
def fetch_majors():
    soup = fetch_page("Majors")
    event_elements = soup.find_all("div", {"class": "divRow tournament-card-premier"})
    return [event.find("b").find("a")["href"] for event in event_elements]

# Fetches all player ids/subdirectories for a given region
def fetch_ids(region):
    page = fetch_page("Portal:Players/" + region)
    elements = [item.find_all("a")[1] for item in page.find_all("td")]
    name_dict = [element["title"].replace(" ","_") for element in elements]
    return name_dict

# Player-Specific Information

In [64]:
def fetch_player(player_name, region, major_list, debug=False):
    # pull player profile
    soup = fetch_page(player_name)
    output_dict = dict()
    
    # parse player information
    info_list = soup.find_all("div", {"class": "infobox-cell-2"})
    info_dict = {info_list[i].text[:-1] : info_list[i+1].text for i in range(0, len(info_list),2)}
    if debug:
        print(info_dict)
    # add name to dict
    output_dict["NAME"] = player_name
    output_dict["REGION"] = region
    
    # replace name with romanised name if needed
    if "Romanized Name" in info_dict:
        output_dict["REAL NAME"] = info_dict["Romanized Name"]
    else:
        output_dict["REAL NAME"] = info_dict["Name"]
    
    # get nationality
    if "Nationality" in info_dict:
        output_dict["NATIONALITY"] = info_dict["Nationality"][1:].split("\xa0")[0]
    
    # get team
    if "Team"  in info_dict:
        output_dict["TEAM"] = info_dict["Team"]
    
    # Get age
    if "Born" in info_dict:
        try:
            output_dict["AGE"] = datetime.strptime(info_dict["Born"][:-9].replace(",", ""), "%B %d %Y").strftime("%d/%m/%Y")
        except:
            pass
    
    # Get role
    role_description = None
    if "Role" in info_dict:
        role_description = info_dict["Role"]
    elif "Roles" in info_dict:
        role_description = info_dict["Roles"]
    
    if weapon_description != None:
        if "AWPer" in weapon_description:
            output_dict["WEAPON"] = "AWP"
        elif "Rifler" in weapon_description:
            output_dict["WEAPON"] = "AK47"
            
            
    # Get major appearances
    try:
        soup = fetch_page(player_name + "/Results")
        event_elements = soup.find_all("tr", {"class": "valvemajor-highlighted"})
        event_name_list = [event.find("td", {"style": "text-align:left"}).find("a")["href"] for event in event_elements]
        event_name_list = [event for event in event_name_list if event in major_list]
        output_dict["MAJOR APPEARANCES"] = len(event_name_list)
    except:
        output_dict["MAJOR APPEARANCES"] = 0
    
    if "Approx. Total Winnings" in info_dict:
        output_dict["EARNINGS"] = int(info_dict["Approx. Total Winnings"].replace(",", "").replace("$", ""))
    else:
        output_dict["EARNINGS"] = 0
    
    output_dict["LAST UPDATED"] = datetime.now().strftime("%d/%m/%Y")
    return output_dict
    
#fetch_player("MingSir", "Eastern_%26_Southern_Asia", fetch_majors(), debug=True)
#fetch_player("AntO_oNNN", "CIS", fetch_majors(), debug=True)
fetch_player("XANTARES", "Europe", fetch_majors(), debug=True)

{'Name': 'İsmailсan Dörtkardeş', 'Nationality': '\xa0Turkey\xa0North Macedonia', 'Born': 'August  7, 1995 (age\xa027)', 'Status': 'Active', 'Years Active (Player)': '2012 – Present', 'Role': 'Rifler', 'Team': 'Eternal Fire', 'Approx. Total Winnings': '$400,962', 'Games': 'Global Offensive'}


{'NAME': 'XANTARES',
 'REGION': 'Europe',
 'REAL NAME': 'İsmailсan Dörtkardeş',
 'NATIONALITY': 'Turkey',
 'TEAM': 'Eternal Fire',
 'AGE': '07/08/1995',
 'WEAPON': 'AK47',
 'MAJOR APPEARANCES': 4,
 'EARNINGS': 400962,
 'LAST UPDATED': '18/10/2022'}

In [5]:
def build_df():
    # Get dataframe to werite to
    if os.path.exists(CSV_FILE):
        playerdata = pd.read_csv(CSV_FILE)
    else:
        playerdata = pd.DataFrame(columns=COLUMNS)
        
    # Get list of all mayors
    major_list = fetch_majors()

    try:
        for major_region, sub_regions in tqdm(REGIONS.items()):
            for sub_region in tqdm(sub_regions, leave=False):
                region_ids = fetch_ids(sub_region)[:1]
                for player in tqdm(region_ids, leave=False):
                    if not (playerdata["NAME"].eq(player)).any():
                        row_data = fetch_player(player, major_region, major_list)
                        playerdata = pd.concat([playerdata, pd.DataFrame.from_records([row_data])], ignore_index=True)
    except Exception as e:
        print("Error:", e)
    
    playerdata.to_csv(CSV_FILE, index=False)
    return

#build_df()

In [67]:
def age(birthdate):
    birthdate = datetime.strptime(birthdate, "%d/%m/%Y")
    today = datetime.now()
    age = today.year - birthdate.year - ((today.month, today.day) < (birthdate.month, birthdate.day))
    return age

def playerdata_postprocess(df):
    columns = list(pd.read_csv(CSV_FILE).columns)
    del columns[columns.index("TEAM")]
    df.dropna(inplace = True, subset = columns)
    df.sort_values(by=["EARNINGS"], ascending = False, inplace = True)
    df["AGE"] = [age(date) for date in df["AGE"]]
    df["REGION"] = np.where(df["NATIONALITY"].isin(MISLABELS), "Europe", df["REGION"])
    return df

playerdata_post = playerdata_postprocess(pd.read_csv(CSV_FILE))

In [69]:
def f(sregion, sage, sweapon, smajors):
    print("Region:", sregion, "\n",
          "Age:", sage, "\n",
          "Weapon:", sweapon, "\n",
          "Majors", smajors, "\n")
    return playerdata_post.loc[(playerdata_post["REGION"].isin(sregion)) &
                               (playerdata_post["AGE"] >= sage[0]) &
                               (playerdata_post["AGE"] <= sage[1]) &
                               (playerdata_post["WEAPON"].isin(sweapon)) &
                               (playerdata_post["MAJOR APPEARANCES"] >= smajors[0]) &
                               (playerdata_post["MAJOR APPEARANCES"] <= smajors[1])
                               ]
    
interact(f, sregion = region_slide, sage = age_slider, sweapon = weapon_slide, smajors = majors_slider)

interactive(children=(SelectMultiple(description='REGION', index=(2,), options=('Europe', 'Americas', 'Asia-Pa…

<function __main__.f(sregion, sage, sweapon, smajors)>