In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# import the required libraries for web scraping
from bs4 import BeautifulSoup
import requests
import re
import time
import random

In [None]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 200)

In [None]:
dota_players = pd.read_csv('data/dota_players.csv')
dota_teams = pd.read_csv('data/dota_teams.csv')
dota_heroes = pd.read_csv('data/dota_heroes.csv')

In [None]:
single_performances = pd.read_csv('data/datdota_singleperformances.csv')

In [None]:
single_performances

In [None]:
# get a list of how gpm scales with kills and last hits
gpm_kills = single_performances.groupby('Kills')['GPM'].mean()
gpm_lh = single_performances.groupby('Last Hits')['GPM'].mean()
xpm_kills = single_performances.groupby('Kills')['XPM'].mean()
xpm_lh = single_performances.groupby('Last Hits')['XPM'].mean()
xpm_level = single_performances.groupby('LVL')['XPM'].mean()

# plot the data
fig, ax = plt.subplots(figsize=(10, 6))
fig2, ax2 = plt.subplots(figsize=(10, 6))
ax.plot(gpm_kills.index, gpm_kills.values, label='GPM per kill')
ax2.plot(gpm_lh.index, gpm_lh.values, label='GPM per last hit')
ax.set_xlabel('Kills')
ax.set_ylabel('GPM')
ax2.set_xlabel('Last Hits')
ax2.set_ylabel('GPM')
ax2.legend()
ax.legend()
plt.show()

In [None]:
# drop the major outliers in single_performances
single_performances = single_performances[single_performances['Kills'] < 30]


In [None]:
# get the value ranges for all columns in single_performances sorted by hero, ignoring Match, Player, Result, and End Game Items
hero_stats = single_performances.drop(['Match', 'Player', 'Result', 'End Game Items', 'KDA'], axis=1).groupby('Hero').agg(['min', 'max', 'mean'])


In [None]:
# get the value ranges for all columns in single_performances and put them in a dictionary
value_ranges = {}
value_averages = {}
for col in single_performances.columns:
    if col != 'Match' and col != 'Hero' and col != 'Player' and col != 'Result' and col != 'End Game Items':
        value_ranges[col] = [min(single_performances[col]), max(single_performances[col])]
        value_averages[col] = np.mean(single_performances[col])
value_ranges

In [None]:
player_options = dota_players['ID'].unique()

In [41]:
# a class to store single performance data
class SinglePerformance:
    def __init__(
        self,
        match_id,
        player_id,
        hero_id,
        kills,
        deaths,
        assists,
        last_hits,
        denies,
        gpm,
        xpm,
        hero_damage,
        tower_damage,
        hero_healing,
        level,
    ):
        self.match_id = match_id
        self.player_id = player_id
        self.hero_id = hero_id
        self.kills = kills
        self.deaths = deaths
        self.assists = assists
        self.last_hits = last_hits
        self.denies = denies
        self.gpm = gpm
        self.xpm = xpm
        self.hero_damage = hero_damage
        self.tower_damage = tower_damage
        self.hero_healing = hero_healing
        self.level = level

    def get_info(self):
        return {
            "Match": self.match_id,
            "Player": self.player_id,
            "Hero": self.hero_id,
            "Kills": self.kills,
            "Deaths": self.deaths,
            "Assists": self.assists,
            "Last_Hits": self.last_hits,
            "Denies": self.denies,
            "GPM": self.gpm,
            "XPM": self.xpm,
            "Hero_Damage": self.hero_damage,
            "Tower_Damage": self.tower_damage,
            "Hero_Healing": self.hero_healing,
            "Level": self.level,
        }

In [77]:
hero_stats

Unnamed: 0_level_0,Hero,Kills,Kills,Kills,Deaths,Deaths,Deaths,Assists,Assists,Assists,GPM,GPM,GPM,XPM,XPM,...,LVL,LVL,HD,HD,HD,TD,TD,TD,HH,HH,HH,GS,GS,GS,HERO_ID
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean,min,max,mean,min,max,mean,min,max,mean,min,max,...,max,mean,min,max,mean,min,max,mean,min,max,mean,min,max,mean,Unnamed: 31_level_1
0,Abaddon,0,2,1.0,2,5,3.5,4,22,13.0,214,298,256.0,251,579,...,23,18.5,4645,7268,5956.5,131,354,242.5,596,12983,6789.5,9455,13435,11445.0,0
1,Ancient Apparition,3,3,3.0,6,6,6.0,16,16,16.0,320,320,320.0,484,484,...,20,20.0,12624,12624,12624.0,589,589,589.0,0,0,0.0,12035,12035,12035.0,1
2,Anti-Mage,10,10,10.0,0,0,0.0,9,9,9.0,866,866,866.0,997,997,...,24,24.0,20489,20489,20489.0,16618,16618,16618.0,0,0,0.0,29100,29100,29100.0,2
3,Arc Warden,3,10,5.75,0,5,2.25,7,17,12.25,603,910,798.0,606,856,...,27,23.0,21184,57266,39504.75,601,7521,4387.25,0,0,0.0,19515,38665,30241.25,3
4,Axe,5,13,9.333333,4,11,8.333333,2,10,7.0,299,579,471.666667,322,926,...,26,21.333333,20286,44131,29495.0,315,1271,770.666667,0,0,0.0,10330,24110,18076.666667,4
5,Bane,0,3,1.5,5,10,7.5,6,22,12.0,196,269,233.0,214,361,...,17,14.25,3026,11634,6504.75,0,214,91.25,0,0,0.0,5575,10200,8050.0,5
6,Batrider,1,16,7.980769,0,11,4.288462,1,30,13.884615,300,745,526.557692,349,982,...,30,22.403846,8837,66647,25457.865385,0,3730,350.326923,0,228,16.211538,6065,38300,18835.0,6
7,Beastmaster,0,9,3.348837,0,10,4.395349,1,36,13.0,335,2063,548.790698,345,34400,...,27,20.046512,5763,32255,14797.27907,0,16267,6493.906977,0,14017,3384.860465,4580,33785,17253.604651,7
8,Bloodseeker,0,16,5.266667,0,9,4.333333,0,17,7.333333,391,7552,1031.733333,373,34400,...,30,21.133333,3027,71068,20107.333333,0,13110,3195.2,0,195,13.0,6860,38645,19487.0,8
9,Bounty Hunter,0,4,1.666667,3,8,5.333333,4,9,5.666667,185,555,344.666667,211,540,...,18,14.666667,4192,19602,9655.333333,0,874,480.0,0,0,0.0,5015,18305,11866.666667,9


In [72]:
# a function to create random match data. it accepts an array of player ids. it returns a list of 10 SinglePerformance objects. The hero ids are randomly selected from the list of heroes, and if a hero has already been chosen, choose a new one. The stats are generated based on the hero values in hero_stats.
def create_random_match(players):
    match_id = random.randint(100000, 999999)
    performances = []
    for player in players:
        hero_id = random.choice(dota_heroes['HERO_ID'])
        while hero_id in [x.hero_id for x in performances]:
            hero_id = random.choice(dota_heroes['HERO_ID'])
        kills = random.randint(hero_stats.loc[hero_id]['Kills']['min'], hero_stats.loc[hero_id]['Kills']['max'])
        deaths = random.randint(hero_stats.loc[hero_id]['Deaths']['min'], hero_stats.loc[hero_id]['Deaths']['max'])
        assists = random.randint(hero_stats.loc[hero_id]['Assists']['min'], hero_stats.loc[hero_id]['Assists']['max'])
        last_hits = random.randint(hero_stats.loc[hero_id]['Last Hits']['min'], hero_stats.loc[hero_id]['Last Hits']['max'])
        denies = random.randint(hero_stats.loc[hero_id]['Denies']['min'], hero_stats.loc[hero_id]['Denies']['max'])
        gpm = random.randint(hero_stats.loc[hero_id]['GPM']['min'], hero_stats.loc[hero_id]['GPM']['max'])
        xpm = random.randint(hero_stats.loc[hero_id]['XPM']['min'], hero_stats.loc[hero_id]['XPM']['max'])
        hero_damage = random.randint(hero_stats.loc[hero_id]['HD']['min'], hero_stats.loc[hero_id]['HD']['max'])
        tower_damage = random.randint(hero_stats.loc[hero_id]['TD']['min'], hero_stats.loc[hero_id]['TD']['max'])
        hero_healing = random.randint(hero_stats.loc[hero_id]['HH']['min'], hero_stats.loc[hero_id]['HH']['max'])
        level = random.randint(hero_stats.loc[hero_id]['LVL']['min'], hero_stats.loc[hero_id]['LVL']['max'])
        performances.append(SinglePerformance(match_id, player, hero_id, kills, deaths, assists, last_hits, denies, gpm, xpm, hero_damage, tower_damage, hero_healing, level))

    for performance in performances:
        performances[performances.index(performance)] = performance.get_info()
    return pd.DataFrame(performances)

In [73]:
randommatch = create_random_match(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"])

In [74]:
randommatch

Unnamed: 0,Match,Player,Hero,Kills,Deaths,Assists,Last_Hits,Denies,GPM,XPM,Hero_Damage,Tower_Damage,Hero_Healing,Level
0,2951473646,A,112,4,5,5,181,17,378,549,13169,274,59,17
1,2951473646,B,31,2,7,22,132,4,479,541,13137,8264,5182,19
2,2951473646,C,52,8,1,10,349,13,482,628,19142,3401,5588,18
3,2951473646,D,65,1,6,16,21,2,228,284,4503,60,0,16
4,2951473646,E,99,11,4,12,573,3,426,545,41086,2345,8939,29
5,2951473646,F,49,3,6,10,34,6,284,594,6336,532,120,13
6,2951473646,G,21,8,7,7,269,14,442,760,16489,1328,2580,27
7,2951473646,H,56,5,4,3,809,23,552,714,99474,7407,0,17
8,2951473646,I,10,0,2,5,134,9,471,561,16426,532,249,26
9,2951473646,J,118,6,1,6,358,5,655,762,18654,10738,0,22


In [18]:
# create a dataframe of dataframes for each team_name in dota_players
team_dataframes = {}
for team_name in dota_players['team_name'].unique():
    team_dataframes[team_name] = dota_players[dota_players['team_name'] == team_name]

In [22]:
team_dataframes["Alliance"]

Unnamed: 0.1,Unnamed: 0,ID,name,country,Region,team_name,role
6,6,charlie,Charlie Arat,Sweden,Europe,Alliance,Carry
7,7,Loda,Jonathan Emanuel Berg,Sweden,Europe,Alliance,Coach
8,8,s4,Gustav Magnusson,Sweden,Europe,Alliance,Offlaner
9,9,ChYuan,Ng Kee Chyuan,Malaysia,Southeast Asia,Alliance,Solo Middle
10,11,ponlo,Remus Goh Zhi Xian,Singapore,Southeast Asia,Alliance,Support
11,12,Handsken,Simon Rasmus Haag,Sweden,Europe,Alliance,Support


In [None]:
value_ranges

In [None]:
regions = ['North America', 'Europe', 'China', 'Southeast Asia', 'South America', 'CIS']

In [None]:
# function that takes a pandas dataframe and assign a region based on each player country
def assign_region(df):
    df['Region'] = df['country'].apply(lambda x: 'North America' if x in ['United States', 'Canada'] else 'Europe' if x in ['United Kingdom', 'Germany', 'France', 'Spain', 'Italy', 'Netherlands', 'Poland', 'Sweden', 'Denmark', 'Norway', 'Finland', 'Belgium', 'Switzerland', 'Austria', 'Portugal', 'Greece', 'Czech Republic', 'Hungary', 'Romania', 'Bulgaria', 'Serbia', 'Slovenia', 'Bosnia and Herzegovina', 'Croatia', 'Ireland', 'Luxembourg', 'Slovakia', 'Estonia', 'Latvia', 'Lithuania', 'Moldova', 'Montenegro', 'Albania', 'Cyprus', 'Macedonia', 'Malta', 'Ukraine', 'Armenia', 'Azerbaijan', 'Georgia', 'Kazakhstan', 'Russia', 'Turkey', 'Belarus', 'Iceland', 'Liechtenstein', 'Monaco', 'San Marino', 'Vatican City'] else 'China' if x in ['China', 'Hong Kong', 'Taiwan'] else 'Southeast Asia' if x in ['Singapore', 'Malaysia', 'Philippines', 'Indonesia', 'Thailand', 'Vietnam', 'Cambodia', 'Laos', 'Myanmar', 'Brunei'] else 'South America' if x in ['Brazil', 'Argentina', 'Chile', 'Colombia', 'Ecuador', 'Peru', 'Venezuela', 'Uruguay', 'Bolivia', 'Paraguay', 'Guyana', 'Suriname', 'French Guiana', 'Easter Island', 'Falkland Islands', 'South Georgia and the South Sandwich Islands', 'South Sandwich Islands', 'Antarctica'] else 'CIS' if x in ['Russia', 'Ukraine', 'Belarus', 'Kazakhstan', 'Azerbaijan', 'Armenia', 'Georgia', 'Kyrgyzstan', 'Moldova', 'Tajikistan', 'Turkmenistan', 'Uzbekistan', 'Kosovo', 'Abkhazia', 'South Ossetia'] else 'Other')
    return df

In [None]:
dota_players = assign_region(dota_active_players)

In [None]:
# function that detects first occurence of char
def find_first(string, char):
    for i, c in enumerate(string):
        if c == char:
            return i
    return -1

# function that detects last occurence of char
def find_last(string, char):
    for i, c in enumerate(string[::-1]):
        if c == char:
            return len(string) - i - 1
    return -1

In [None]:
# split the team column into team name and role
dota_players['team_name'] = dota_players['team'].apply(lambda x: x[:find_first(x, '(')].strip())
dota_players['role'] = dota_players['team'].apply(lambda x: x[find_first(x, '(')+1:find_last(x, ')')])
dota_players.drop('team', axis=1, inplace=True)

In [None]:
roles = ['Support', 'Offlaner', 'Solo Middle', 'Carry', 'Coach']

In [None]:
# filter the roles
dota_players = dota_players[dota_players['role'].isin(roles)]

### Below is for scraping dota data from liquipedia

In [None]:
# scrape the data from the website
url = "https://liquipedia.net/dota2/Players_(all)"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
rows = soup.findAll("tr")
# # find the table with the data
# table = soup.find('table', class_='wikitable')

In [None]:
rows[1]

In [None]:
indexes = rows[1]
index_values = []
for cell in indexes.find_all("th"):
    index_values.append(cell.get_text().rstrip())
players = []

In [None]:
index_values

In [None]:
for row in rows:
    if len(row) > 3:
        player = {}
        cells = row.find_all("td")
        for i in range(0, len(cells)):
            key = index_values[i]
            if key == " Links":
                key = "country"
                value = cells[0].find("a").get("title")
            else:
                value = cells[i].get_text().rstrip()
            player[key] = value
        if len(player) > 0:
            players.append(player)

In [None]:
player_df = pd.DataFrame(players)

In [None]:
player_df.rename(
    columns={" Real Name": "name", " Team": "team", " ID": "ID"}, inplace=True
)

In [None]:
filter_roles = {
    "Carry",
    "Mid",
    "Offlane",
    "Support",
    "Hard Support",
    "Soft Support",
    "Hard",
    "Soft",
    "Hard Carry",
    "Soft Carry",
    "Hard Carry",
    "Solo Middle",
    "Analyst",
    "Caster",
    "Host",
    "Offlaner"
}
# drop the rows that are in the filter_roles
player_df = player_df[~player_df["team"].isin(filter_roles)]

In [None]:
player_df.to_csv("liquipedia_dota_players.csv", index=False)

In [None]:
# strip all leading and trailing spaces
player_df = player_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [None]:
url = "https://liquipedia.net/dota2/Portal:Teams"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
teams = []
divs = soup.find_all('div',class_="panel-box")

In [None]:


def get_teams():
	url = "https://liquipedia.net/dota2/Portal:Teams"
	page = requests.get(url)
	soup = BeautifulSoup(page.content, "html.parser")
	teams = []
	divs = soup.find_all('div',class_="panel-box")
	templates = soup.find_all('span',class_="team-template-team-standard")
	for team in templates:
		teams.append(team.a['title'])
			
	return teams

In [None]:
teamdf = pd.DataFrame(get_teams())

In [None]:
teamdf.to_csv("activeteams.csv")

In [None]:
print(
    "Min: {}, Max: {}".format(
        dota_performances["GPM"].min(), dota_performances["GPM"].max()
    )
)