In [11]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from datetime import date, timedelta

In [2]:
# Load list with team names and corresponding IDs
df_team_ids = pd.read_csv(r"../data/team_IDs.csv", index_col="Name")

In [3]:
def get_team_id(team):
    # Use entered team name to get ID
    team_id = df_team_ids.loc[team.lower(), "ID"]
    
    return team_id


def current_rank(team):
    # Turn name into lowercase characters only
    team = team.lower()
    
    # Use entered team name to get ID
    team_id = get_team_id(team)
    
    # Turn spaces into dashes
    team_name = team.split()
    team_name = "-".join(team_name)
    
    # Get current ranking of team
    team_profile = requests.get("https://www.hltv.org/team/{}/{}".format(team_id, team_name)).text
    team_profile_soup = BeautifulSoup(team_profile, "lxml")
    team_stats = team_profile_soup.find("div", class_="profile-team-stat")
    current_rank = team_stats.a.text[1:] # get rid of "#" by skipping char at index 0
    
    return current_rank

In [None]:
entered_name = "North"
team_id = get_team_id(entered_name)
team_rnk_now = current_rank(entered_name)

end_date = date.today()
start_date = date.today() - timedelta(days=100)

params = [start_date,end_date,team_id]
results_url = "https://www.hltv.org/results?startDate={}&endDate={}&team={}".format(*params)
source = requests.get(results_url).text
soup = BeautifulSoup(source, "lxml")

In [32]:
results_list = []
for date_results in soup.findAll("div", class_="results-sublist"):  # loop over all days
    # save text of current date
    date = date_results.span.text
    
    # Remove the first two words ("Results for")
    date = date.split()
    date[3] = date[3][0:-2] # remove suffixes, e.g. "st" from "21st"
    date = " ".join(date[2:])
    
    for match in date_results.findAll("div", class_="result-con"):  # loop over all matches of the current date
        # Get a list containing the names of the two teams (team of interest is always at index 0)
        teams = match.findAll("div", class_="team")
        teams = [team.text for team in teams]
        
        # Get the score
        scores = match.find("td", class_="result-score").text
        scores = scores.split()
        del scores[1]
        scores = [int(scores[i]) for i in range(len(scores))]
        if scores[0] > scores[1]:
            outcome = "Win"
        elif scores[0] < scores[1]:
            outcome = "Loss"
        elif scores[0] == scores[1]:
            outcome = "Draw"
        else:
            outcome = None
        
        # Get map
        map_name = match.find("div", class_="map-text").text
        
        # Get current ranking of opponent team
        opponent_rnk_now = current_rank(teams[1])
        
        # Create a dict with all the relevant information
        match_results_dict = {
            "Date": date,
            "Team_Name": teams[0],
            "Outcome": outcome,
            "Opponent_Name": teams[1],
            "Team_Score": scores[0],
            "Opponent_Score": scores[1],
            "Map": map_name,
            "Team_rnk_now": team_rnk_now,
            "Opponent_rnk_now": opponent_rnk_now
        }
        # Append dict to list of results
        results_list.append(match_results_dict)

In [33]:
# Create dataframe from list of dicts
df = pd.DataFrame(results_list)
# Turn "Date" column into datetime format
df["Date"] = pd.to_datetime(df["Date"], format="%B %d %Y")
df

Unnamed: 0,Date,Team_Name,Outcome,Opponent_Name,Team_Score,Opponent_Score,Map,Team_rnk_now,Opponent_rnk_now
0,2020-05-21,North,Loss,Heroic,0,2,bo3,25,30
1,2020-05-20,North,Loss,Astralis,0,2,bo3,25,1
2,2020-05-19,North,Win,G2,2,1,bo3,25,5
3,2020-05-16,North,Loss,GODSENT,1,2,bo3,25,20
4,2020-05-12,North,Loss,Heretics,0,2,bo3,25,22
5,2020-05-10,North,Win,G2,2,1,bo3,25,5
6,2020-05-07,North,Win,Movistar Riders,2,0,bo3,25,31
7,2020-05-05,North,Win,c0ntact,2,1,bo3,25,42
8,2020-05-03,North,Loss,Dignitas,0,2,bo3,25,38
9,2020-05-01,North,Win,Nordavind,19,17,nuke,25,43
