# Getting info on eSports teams from Liquipedia

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import html5lib

from time import sleep

In [2]:
continents_list = ['Americas', 'Europe', 'China']

In [3]:
teams_links = []

for cont in continents_list:
    res = requests.get('https://liquipedia.net/dota2/Portal:Teams/{cont}'.format(cont=cont))
    tree = BeautifulSoup(res.text, 'html5lib')
    
    for link in tree.find_all('span', class_='team-template-text'):
        teams_links.append(link.a['href'])

In [4]:
len(teams_links)

49

In [5]:
teams_links[0]

'/dota2/CompLexity_Gaming'

In [68]:
teams_data = {}

for team in teams_links:
    team_num = teams_links.index(team)
    teams_data[team_num] = {}
    
    res = requests.get('https://liquipedia.net{team}'.format(team=team))
    tree = BeautifulSoup(res.text, 'html5lib')
    
    name_raw = tree.find('h1', class_='firstHeading')
    name = name_raw.text.replace('\n\t\t\t\t\t\t\t\t','').replace('\n\t\t\t\t\t\t\t','')
    teams_data[team_num].update({'name':name})
    
    # get meta data on teams
    descr_box = tree.find('div', class_='fo-nttax-infobox wiki-bordercolor-light')
    
    # location
    if descr_box.find(text='Location:') is not None:
        loc_raw = descr_box.find(text='Location:')
        loc = loc_raw.parent.parent.findNext('div').findNext('div').a.get('title')
        teams_data[team_num].update({'location':loc})
    else:
        continue
        
    # region
    if descr_box.find(text='Region:') is not None:
        reg_raw = descr_box.find(text='Region:')
        reg = reg_raw.parent.parent.findNext('div').findNext('div').a.get('title')
        teams_data[team_num].update({'region':reg})
    else:
        continue
        
    # manager_from
    if descr_box.find(text='Manager:') is not None:
        manag_from_raw = descr_box.find(text='Manager:')
        manag_from = manag_from_raw.parent.parent.findNext('div').findNext('div').a.get('title')
        teams_data[team_num].update({'manager_from':manag_from})
    else: 
        continue
        
    # manager
    if descr_box.find(text='Manager:') is not None:
        manag_raw = descr_box.find(text='Manager:')
        manag = manag_raw.parent.parent.findNext('div').findNext('div').text
        teams_data[team_num].update({'manager':manag})
    else:
        continue
        
    # earnings
    if descr_box.find(text='Total Earnings:') is not None:
        earn_raw = descr_box.find(text='Total Earnings:')
        earn = earn_raw.parent.parent.findNext('div').findNext('div').text
        earn = int(earn.replace('$', '').replace(',', ''))
        teams_data[team_num].update({'total_earnings':earn})
    else:
        continue
    
    # rank
    if descr_box.find(text='Pro Circuit Rank:') is not None: 
        rank_raw = descr_box.find(text='Pro Circuit Rank:')
        rank_raw = rank_raw.parent.parent.findNext('div').findNext('div').a.text
        sep = '\xa0('
        rank = int(rank_raw.split(sep, 1)[0].replace('#','').replace('–','0'))
        points = int(rank_raw.split(sep, 1)[1].replace(' points)','').replace(' point)',''))
        teams_data[team_num].update({'rank':rank, 'points':points})
    else:
        continue
    
    # created
    if descr_box.find(text='Created:') is not None:
        create_raw = descr_box.find(text='Created:')
        create = create_raw.parent.parent.findNext('div').findNext('div').text
        teams_data[team_num].update({'created_year':create})
    else:
        continue
    

In [69]:
teams_df = pd.DataFrame.from_dict(teams_data).T

In [71]:
teams_df.head()

Unnamed: 0,created_year,location,manager,manager_from,name,points,rank,region,total_earnings
0,Organization: 2003Dota 2: 2012-02-16,USA,Joshua Schmidt,USA,compLexity Gaming,135.0,19.0,North America,825959
1,Organization: 1999Dota 2: 2011-10-24,USA,Phillip Aram,USA,Evil Geniuses,1335.0,11.0,North America,15144669
2,Organization: 2015-10-07Dota 2: 2017-09-13,USA,Josh Arkin,USA,Immortals,90.0,21.0,North America,47500
3,Organization: 2006Dota 2: 2017-09-26,USA,Kodiak Shroyer,USA,OpTic Gaming,1800.0,9.0,North America,495800
4,,USA,KBBQ,USA,VGJ.Storm,,,North America,342000
