In [1]:
l = ['1. Doing Fun removed Ancient', '2. ATK removed Nuke', '3. Doing Fun picked Inferno',
     '4. ATK picked Vertigo', '5. Doing Fun removed Dust2', '6. ATK removed Mirage',
     '7. Overpass was left over']

In [7]:
import numpy as np
import regex as re
import datetime as dt
from dateutil.relativedelta import relativedelta
import selenium as se
from selenium import webdriver
from selenium.webdriver.common.by import By

MAP_POOL = ['Overpass', 'Mirage', 'Vertigo', 'Dust2', 'Nuke', 'Ancient', 'Inferno']
MAP_CODE = {31: 'Dust2', 32: 'Mirage', 33: 'Inferno', 34: 'Nuke',
            40: 'Overpass', 46: 'Vertigo', 47: 'Ancient'}

class Team:
    def __init__(self, link):
        self.link = link
        self.number, self.name = self.get_number_name()
        
    def get_number_name(self):
        rgx = r"(?:https://www.hltv.org/team/)(\d*)/(\S*)\Z"
        match = re.match(rgx, self.link)
        number = match.group(1)
        name = match.group(2)
        return number, name

def get_team_name_from_veto(string: str) -> str:
    '''
    Using a regex pattern, returns the name of a team given a string of veto/pick
    '''
    rgx = r"(?:\d.) (.*?)(?: removed|picked)"
    # group(1): simply the matching group
    # strip: remove the leading and trailing spaces
    team_name = re.match(rgx, string).group(1).strip()
    
    return team_name


def parse_time(time_str: str):
    '''
    Auxiliar function that transforms a given time string pattern into a relativedelta
    object
    
    Parameters
    ----------
    time_str: str
        something like '28d', '3m', '2y' or a combination of them '1y4m37d'
        
    Returns
    -------
    delta: dateultim.relativedelta.relativedelta
        relativedelta object
    '''
    regex = re.compile(r'((?P<years>\d+?)y)?((?P<months>\d+?)m)?((?P<days>\d+?)d)?')
    parts = regex.match(time_str)
    if not parts:
        return
    parts = parts.groupdict()
    time_params = {}
    for name, param in parts.items():
        if param:
            time_params[name] = int(param)
    delta = relativedelta(**time_params)
    return delta

    
def retrieve_maps_statistics(team, end_date, interval='3m') -> dict:
    '''
    Retrieve the relevant statistics of a given team for all maps in the map pool
    starting at an end date (last date to be retrieved) and going backwards for the
    interval
    
    Parameters
    ----------
    team: Team
        team to be analysed
    end_date: str
        something like '2022-06-13'
    interval: str (default='3m')
        interval to be analysed
    '''
    start_date = dt.datetime.strftime(dt.datetime.strptime(end_date, '%Y-%m-%d') - parse_time(interval), "%Y-%m-%d")
    team_number = team.number
    team_name   = team.name
    # stats page for the team
    link = rf"https://www.hltv.org/stats/teams/maps/{team_number}/{team_name}?startDate={start_date}&endDate={end_date}"
    #driver = webdriver.Chrome()
    # local, needed to reboot the computer to get the PATH updated, must change later
    driver = webdriver.Chrome(executable_path=r"C:\Users\rodri\Downloads\chromedriver_win32\chromedriver.exe")
    driver.get(link)
    # win rate per map
    l = driver.find_elements(by=By.CLASS_NAME, value='map-pool-map-holder.map-stats')
    # in the dic: the keys are the maps' names and the values are the win rate, # maps played
    # the % of rounds won after 1st kill, and % of rounds won after 1st death
    dic = {mapa: [0, 0, 0, 0] for mapa in MAP_POOL}
    for i in range(len(l)):
        [m, p] = l[i].text.replace(' ', '').split('-')
        dic[m][0] = float(p.replace('%', 'e-2'))
    
    # We are looking for this table
    table = driver.find_element(by=By.CLASS_NAME, value='stats-section.stats-team.stats-team-maps')
    # On this table, there's a subtable ('two-grid') with the informations about every map played
    # We want all the direct child (XPATH='*') of this class
    # Each element of `maps` is another mini table with statistics about a map
    maps  = table.find_element(by=By.CLASS_NAME, value='two-grid').find_elements(by=By.XPATH, value='*')
    for i in range(len(maps)):
        map_name = maps[i].find_element(by=By.CLASS_NAME, value='map-pool-map-holder').text
        stats = maps[i].find_element(by=By.CLASS_NAME, value='stats-rows.standard-box').text.split('\n')#[1].split(' / ')
        map_counts = stats[1].split(' / ')
        map_counts = sum([int(map_counts[i]) for i in range(len(map_counts))])
        win_first_kill = stats[7]
        win_first_kill = float(win_first_kill.replace('%', 'e-2'))
        win_first_death = stats[9]
        win_first_death = float(win_first_death.replace('%', 'e-2'))
        dic[map_name][1:] = [map_counts, win_first_kill, win_first_death]
        
    driver.close()
    
    return dic

def check_standard_veto(vetoes: str) -> bool:
    '''
    Checks if the given veto process was of the standard form
    (i.e. B1, B2, P1, P2, B1, B2)
    
    Parameters
    ----------
    vetoes: str
        string cointaining the veto process
    
    Returns
    -------
    standard: bool
    '''
    order = []
    # len(vetoes) - 1: last line is the left over, aways
    for pick in range(len(vetoes) - 1):
        order.append(get_team_name_from_veto(vetoes[pick]))
    teams = list(set(order))
    pass

In [12]:
masonic = Team('https://www.hltv.org/team/9455/imperial')

In [13]:
end_date = '2022-06-13'
retrieve_maps_statistics(masonic, end_date)

  driver = webdriver.Chrome(executable_path=r"C:\Users\rodri\Downloads\chromedriver_win32\chromedriver.exe")


{'Overpass': [0.667, 9, 0.746, 0.298],
 'Mirage': [0.5, 8, 0.764, 0.283],
 'Vertigo': [0.125, 8, 0.686, 0.202],
 'Dust2': [0.6, 10, 0.729, 0.317],
 'Nuke': [0.333, 9, 0.674, 0.252],
 'Ancient': [0, 0, 0, 0],
 'Inferno': [0.786, 14, 0.737, 0.31]}