In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [27]:
def getMatches(year):
    url = f'https://liquipedia.net/dota2/The_International/{str(year)}'
    response = requests.get(url)
    html_content = response.text
    soup = BeautifulSoup(html_content, "html.parser")

    team_name = soup.find_all("span", class_="name hidden-xs")
    team_composition = soup.find_all('div', class_='teamcard')
    score_list = soup.find_all("div", class_="brkts-opponent-score-inner")

    team_performance = []
    team_members = []
    team1_score = []
    team2_score = []

    for team in team_composition:
        members = []
        teams = team.find_all('a')[0:12:2]
        for names in teams:
            members.append(names.text)
        team_members.append(members)
    
    for i,team in enumerate(team_name):
        performance = {"Team_Name" : team.text,
                    "Games_Won" : score_list[i].text}
        team_performance.append(performance)
        if i % 2 == 0:
            score = {"Team1" : team.text,
                    "Team1_Score" : score_list[i].text}
            team1_score.append(score)
        else:
            score = {"Team2_Score" : score_list[i].text,
                    "Team2" : team.text}
            team2_score.append(score)

    team_performance = pd.DataFrame(team_performance)
    team_performance['Year'] = year

    team_members = pd.DataFrame(team_members, 
                                columns=['Team_Name','Member1','Member2','Member3','Member4','Member5'])
    team_members['Year'] = year

    matches = pd.concat([pd.DataFrame(team1_score), pd.DataFrame(team2_score)],axis=1)
    matches['Year'] = year
    return team_performance, team_members, matches


In [28]:
team_performance_list = pd.DataFrame()
team_members_list = pd.DataFrame()
matches_list = pd.DataFrame()
print("Getting Dota 2 The International Main Event Matches for year")
for year in range(2011,3000):
    team_performance, team_members, matches = getMatches(year)
    if len(matches) == 0:
        print(f'{year} (no entry)')
        break
    else:
        print(f'{year}...')
    team_performance_list = pd.concat([team_performance_list,team_performance])
    team_members_list = pd.concat([team_members_list,team_members])
    matches_list = pd.concat([matches_list,matches])

Getting Dota 2 The International Main Event Matches for year
2011...
2012...
2013...
2014...
2015...
2016...
2017...
2018...
2019...
2020...
2021...
2022...
2023 (no entry)


In [29]:
team_performance_list.head()

Unnamed: 0,Team_Name,Games_Won,Year
0,EHOME,1,2011
1,TYLOO,0,2011
2,Meet Your Makers,0,2011
3,Scythe Gaming,1,2011
4,EHOME,0,2011


In [30]:
team_members_list.head()

Unnamed: 0,Team_Name,Member1,Member2,Member3,Member4,Member5,Year
0,Meet Your Makers,MiSeRy,MaNia,PlaymatE,Maelk,DeMoN,2011
1,MiTH.Trust,l3nu,TnK,LaKelz,Lookball,aabBAA,2011
2,Natus Vincere,Artstyle,Dendi,XBOCT,Puppey,LighTofHeaveN,2011
3,Virus Gaming,Maldejambes,VladTepes,Sockshka,Pseudo,Ph0eniX,2011
4,MUFC,SilverCross,Net,Sharky,xiaoling,kYxY,2011


In [31]:
matches_list.head()

Unnamed: 0,Team1,Team1_Score,Team2_Score,Team2,Year
0,EHOME,1,0,TYLOO,2011
1,Meet Your Makers,0,1,Scythe Gaming,2011
2,EHOME,0,1,Scythe Gaming,2011
3,OK.Nirvana.int,0,1,Invictus Gaming,2011
4,Natus Vincere,1,0,Moscow Five,2011


In [32]:
team_performance_list.to_csv('dota2_TI_team_performances.csv', index=False)
team_members_list.to_csv('dota2_TI_team_members.csv',index=False)
matches_list.to_csv('dota2_TI_matches.csv', index=False)