# Dota 2 - Regression Analysis

* Trying to predict the total score of two teams based full match data

In [1]:
# API IMPORTS
from __future__ import print_function
import time
from pprint import pprint
import json
import ast

# BASIC IMPORTS
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
from math import sqrt
from statistics import mean, stdev

### Data Dropping and Cleaning

* Removing all columns except for team scores and draft information

In [2]:
def drop_bulk(df):
    df = df.drop(axis=1, columns=['Unnamed: 0', 'Unnamed: 0.1', 'barracks_status_dire', 'barracks_status_radiant',
                                         'cluster', 'cosmetics', 'duration', 'radiant_xp_adv', 'region', 'series_id',
                                         'series_type', 'skill', 'start_time', 'throw', 'tower_status_dire', 
                                         'tower_status_radiant', 'version', 'engine', 'first_blood_time', 'game_mode',
                                         'league', 'leagueid', 'lobby_type', 'loss', 'match_id', 'match_seq_num', 
                                         'objectives', 'patch', 'draft_timings', 'dire_team', 'radiant_gold_adv',
                                         'radiant_team', 'radiant_win'])
    df = df.dropna(axis=0)
    return df

In [3]:
def create_total(df):
    df["total_score"] = df["dire_score"] + df["radiant_score"]
    return df

In [4]:
def drop_bots(df):
    # Dropping rows that have a total score of 0 (extremely unlikely scenario, arguably impossible)
    # Dropping rows that do not have 10 human players participating 
    #    (means bots are playing and we do not want these included)

    df = df[df.total_score != 0]
    df = df[df.human_players == 10]
    return df

## Want to collect all "picks" from this data structure and have a list of their ids as a column

In [5]:
def collect_picks(row):
    picks = []
    json_string = row["picks_bans"]
    json_string = ast.literal_eval(json_string.replace("'",'"'))
    data = json.loads(json.dumps(json_string))
    picks = [data[v]['hero_id'] for v in range(len(data)) if data[v]['is_pick'] == True]
    return picks

In [6]:
def picks_wrapper(df):

    heroes_lists = []

    for index,row in df.iterrows():
        row = row.copy()
        picks = collect_picks(row)
        heroes_lists.append(picks)
        
        
    df['heroes'] = heroes_lists
    df = df.drop(axis=1, columns=['picks_bans'])
    
    return df

## Dropping original score columns, human players column and separating out heroes list into 10 columns

In [7]:
def columnize_heroes(df):
    df = df.drop(axis=1, columns=['dire_score', 'human_players', 'radiant_score'])
    df[['h1','h2','h3','h4','h5','h6','h7','h8','h9','h10']] = pd.DataFrame(df.heroes.values.tolist(), 
                                                                                index= df.index)
    df = df.drop(axis=1, columns=['heroes'])

    return df

In [8]:
def clean(df):
    print(df.shape)
    # Call other functions
    df = drop_bulk(df)
    df = create_total(df)
    df = drop_bots(df)
    df = picks_wrapper(df)
    df = columnize_heroes(df)
    print(df.shape)
    return df

In [9]:
df_int = pd.read_csv("https://dota-match-ids.s3.amazonaws.com/raw_match_csvs/international_raw_match_full.csv")
df_int = clean(df_int)
df_int.to_csv("../../data/processed/Dalton/reg_int_full.csv")

(2941, 37)
(1797, 11)


In [10]:
df_prem = pd.read_csv("https://dota-match-ids.s3.amazonaws.com/raw_match_csvs/premier_raw_match_full.csv")
df_prem = clean(df_prem)
df_prem.to_csv("../../data/processed/Dalton/reg_prem_full.csv")

(17622, 37)
(9485, 11)


In [11]:
df_prof = pd.read_csv("https://dota-match-ids.s3.amazonaws.com/raw_match_csvs/professional_raw_match_full.csv")
df_prof = clean(df_prof)
df_prof.to_csv("../../data/processed/Dalton/reg_prof_full.csv")

(74339, 40)
(39606, 14)
