# DSC 190 Final Project - Predicting the Result of the League of Legends games based on the champion composition and team statistics

##### by Tat Hei Tsin and Dennis Wu

## 1. Introduction

League of Legends (LoL) is a 5v5 games with players choosing different champions to combat the enemy team. There are over a hundred champions for the players to choose, and different combination will render As huge LoL fans, we wonder what are the important factors that can determine the result of the LoL games. Firstly, we parse the data of Riot Api to build the baseline model to get a sense of the features 

## Import the packages

In [17]:
# Required packages
import numpy as np
import pandas as pd
from riotwatcher import LolWatcher, ApiError
import requests
from bs4 import BeautifulSoup

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

import xgboost as xgb

import time

## Import the data without parsing (high ranked players)

In [5]:
challenger_games = pd.read_csv("data/Challenger_Ranked_Games.csv")
grandmaster_games = pd.read_csv("data/GrandMaster_Ranked_Games.csv")
master_games = pd.read_csv("data/Master_Ranked_Games.csv")

## 2. Create the basedline model

### Get the usernames from the requests and BeautifulSoup in a website with a list of summoner names

In [6]:
r = requests.get("https://lolnames.gg/en/highscores/na/1000/")

In [7]:
soup = BeautifulSoup(r.text, "html.parser")

In [8]:
summoners = soup.findAll("td", {"class": "align-middle h5"})

In [9]:
names = []
for summoner in summoners:
    names.append(summoner.a.text)

In [10]:
names[:10]

['Blueberry Dango',
 'NyxWulf',
 'Yi Gè Rén',
 'BlaoThorne RJ',
 'IPMagazine ',
 'Ewokcore',
 'A Legendary Crab',
 'MelroseR',
 'UwU BigStrongMan',
 'PÊÑNŸWÏŠË']

### Retreive the match by getting the most recent ten games of each user

#### Since the data are the most active users in North American server, we deem that the most recent ten games will provide us with the up to date data

In [19]:
test = names[:1]

In [20]:
match_id = []

#### Create a Riot Api Watchher

In [21]:
# global variables

key = 'RGAPI-284145d4-7ea3-4026-ae90-b7b0b3ba07b1'
watcher = LolWatcher(key)
my_region = 'na1'

In [22]:
test_match_id = []
for pid in test:
    me = watcher.summoner.by_name(my_region, pid)
    my_matches = watcher.match.matchlist_by_account(my_region, me['accountId'])
    for i in range(10):
        test_match_id.append(my_matches["matches"][i]['gameId'])
        time.sleep(2)

In [30]:
test_match_id

[3454669418,
 3454589227,
 3454196621,
 3454165361,
 3454182373,
 3452992811,
 3452242089,
 3452128070,
 3452130121,
 3451944686]

In [24]:
# check league's latest version
latest = watcher.data_dragon.versions_for_region(my_region)['n']['champion']
# Lets get some champions static information
static_champ_list = watcher.data_dragon.champions(latest, False, 'en_US')

# champ static list data to dict for looking up
champ_dict = {}
for key in static_champ_list['data']:
    row = static_champ_list['data'][key]
    champ_dict[row['key']] = row['id']

In [25]:
champ_dict

{'266': 'Aatrox',
 '103': 'Ahri',
 '84': 'Akali',
 '12': 'Alistar',
 '32': 'Amumu',
 '34': 'Anivia',
 '1': 'Annie',
 '523': 'Aphelios',
 '22': 'Ashe',
 '136': 'AurelionSol',
 '268': 'Azir',
 '432': 'Bard',
 '53': 'Blitzcrank',
 '63': 'Brand',
 '201': 'Braum',
 '51': 'Caitlyn',
 '164': 'Camille',
 '69': 'Cassiopeia',
 '31': 'Chogath',
 '42': 'Corki',
 '122': 'Darius',
 '131': 'Diana',
 '119': 'Draven',
 '36': 'DrMundo',
 '245': 'Ekko',
 '60': 'Elise',
 '28': 'Evelynn',
 '81': 'Ezreal',
 '9': 'Fiddlesticks',
 '114': 'Fiora',
 '105': 'Fizz',
 '3': 'Galio',
 '41': 'Gangplank',
 '86': 'Garen',
 '150': 'Gnar',
 '79': 'Gragas',
 '104': 'Graves',
 '120': 'Hecarim',
 '74': 'Heimerdinger',
 '420': 'Illaoi',
 '39': 'Irelia',
 '427': 'Ivern',
 '40': 'Janna',
 '59': 'JarvanIV',
 '24': 'Jax',
 '126': 'Jayce',
 '202': 'Jhin',
 '222': 'Jinx',
 '145': 'Kaisa',
 '429': 'Kalista',
 '43': 'Karma',
 '30': 'Karthus',
 '38': 'Kassadin',
 '55': 'Katarina',
 '10': 'Kayle',
 '141': 'Kayn',
 '85': 'Kennen',
 '12

### Retrieve the champion score from www.metasrc.com, create a DataFrame of the champions with their scores

In [28]:
# Scraping individual champion score
r1 = requests.get("https://www.metasrc.com/5v5/na/tierlist")
soup1 = BeautifulSoup(r1.text, "html.parser")
power_level = soup1.findAll("div", {"class": "_9581uw"})
names = soup1.findAll("div", {"class": "_q8ue62"})
champ = []
for name in names:
    champ.append(name.getText())
rates = []
for rate in power_level:
    rates.append(rate.getText())
tierlist = pd.DataFrame().assign(
    champ = champ,
    score = rates
)
tierlist.sort_values(by = ["champ"])

# Getting individual Champion IDs
IDs = pd.read_csv("data/new.txt", sep = ":", header = None)
IDs.columns = ["ID", "champ"]

champion_list = tierlist.sort_values(by = ["champ"])["champ"].unique()

# Merging Ids and individual champion score
new_tierlist = []

# Averaging similar champions scores into one
for champ in champion_list:
    row = {}
    champ_list = tierlist[tierlist["champ"] == champ]
    champ_list_avg = champ_list["score"].astype(float).mean()
    row["champ"] = champ
    row["score"] = champ_list_avg
    new_tierlist.append(row)

tierlist_pd = pd.DataFrame(new_tierlist)
tierlist_pd = tierlist_pd.sort_values(by = ["champ"])
champion_score = IDs.merge(tierlist_pd, on = champion_list)
champion_score = champion_score.drop(columns = ["champ_x", "champ_y"])
champion_score.columns = ["champion", "champion_id", "score"]
champion_score = champion_score.set_index(["champion_id"])

champion_score

Unnamed: 0_level_0,champion,score
champion_id,Unnamed: 1_level_1,Unnamed: 2_level_1
266,Aatrox,48.115
103,Ahri,62.550
84,Akali,37.505
12,Alistar,51.060
32,Amumu,51.160
...,...,...
238,Zed,67.050
115,Ziggs,39.900
26,Zilean,47.055
142,Zoe,51.010


### Generate the input matches

In [33]:
input_matches = []

for ID in test_match_id:
    match_info = watcher.match.by_id(my_region, ID)
    input_matches.append(match_info)

### Transform the match data to a DataFrame of useful information

In [36]:
# from matchID to matches

# Inputting data frame
output_detail = []
y_result = []

difference_list = ["totalMinionsKilled", "longestTimeSpentLiving", "totalHeal", "totalDamageDealt", "magicDamageDealt", "physicalDamageDealt", "totalDamageDealtToChampions", "magicDamageDealtToChampions", "trueDamageDealtToChampions", "goldEarned"]
tower_list = ["towerKills", "inhibitorKills", "riftHeraldKills"]

#Creating individual player scores
for match_detail in input_matches:
    participants = []
    team = []
    scores = []
    
#     print(match_detail["gameMode"])
    # Calculate Individual statistics
    for row in match_detail['participants']:
        participants_row = {}
        participants_row['champion_Id'] = row['championId']
        participants_row['team'] = row['teamId']
        participants_row['kills'] = row['stats']['kills']
        participants_row['deaths'] = row['stats']['deaths']
        participants_row['assists'] = row['stats']['assists']
        participants_row['totalDamageDealt'] = row['stats']['totalDamageDealt']
        participants_row['magicDamageDealt'] = row['stats']['magicDamageDealt']
        participants_row['physicalDamageDealt'] = row['stats']['physicalDamageDealt']
        participants_row['physicalDamageDealt'] = row['stats']['physicalDamageDealt']
        participants_row['totalDamageDealtToChampions'] = row['stats']['totalDamageDealtToChampions']
        participants_row['magicDamageDealtToChampions'] = row['stats']['magicDamageDealtToChampions']
        participants_row['physicalDamageDealtToChampions'] = row['stats']['physicalDamageDealtToChampions']
        participants_row['trueDamageDealtToChampions'] = row['stats']['trueDamageDealtToChampions']    
        participants_row['goldEarned'] = row['stats']['goldEarned']
        participants_row['champLevel'] = row['stats']['champLevel'] / 18
        participants_row['totalMinionsKilled'] = row['stats']['totalMinionsKilled']
        participants_row['largestMultiKill'] = row['stats']['largestMultiKill']
        participants_row['killingSprees'] = row['stats']['killingSprees']
        participants_row['doubleKills'] = row['stats']['doubleKills']
        participants_row['tripleKills'] = row['stats']['tripleKills']
        participants_row['quadraKills'] = row['stats']['quadraKills']
        participants_row['pentaKills'] = row['stats']['pentaKills']
        participants_row['longestTimeSpentLiving'] = row['stats']['longestTimeSpentLiving']
        participants_row['totalHeal'] = row['stats']['totalHeal']
        participants_row['damageDealtToObjectives'] = row['stats']['damageDealtToObjectives']
        participants_row['damageDealtToObjectives'] = row['stats']['damageDealtToObjectives']
        participants_row['damageDealtToTurrets'] = row['stats']['damageDealtToTurrets']
        participants_row['visionScore'] = row['stats']['visionScore']
        participants_row['timeCCingOthers'] = row['stats']['timeCCingOthers']
        participants_row['totalDamageTaken'] = row['stats']['totalDamageTaken']
        participants_row['magicalDamageTaken'] = row['stats']['magicalDamageTaken']
        participants_row['physicalDamageTaken'] = row['stats']['physicalDamageTaken']   
        participants_row['trueDamageTaken'] = row['stats']['trueDamageTaken']   
        participants.append(participants_row)

    # Champion scores conversion
    match = pd.DataFrame(participants)
    champions = match["champion_Id"]
    for champ in champions:
        scores.append(champion_score.loc[champ].get("score"))
    scores_pd = pd.DataFrame(scores)
    scores_pd.columns = ["score"]
    match = match.join(scores_pd)
    grouped = match.groupby(["team"]).sum()
    grouped = grouped.drop(columns = ['champion_Id'])
        
    # Calculate Team statistsics
    for row in match_detail['teams']:
        team_row = {}
        team_row['team'] = row['teamId']
        team_row['firstBlood'] = int(row['firstBlood'] == True)
        team_row['firstTower'] = int(row['firstTower'] == True)
        team_row['firstBaron'] = int(row['firstBaron'] == True)
        team_row['firstDragon'] = int(row['firstDragon'] == True)
        team_row['firstBlood'] = int(row['firstBlood'] == True) 
        team_row['firstRiftHerald'] = int(row['firstRiftHerald'] == True)
        team_row['towerKills'] = row['towerKills'] / 11
        team_row['inhibitorKills'] = row['inhibitorKills'] / 3
        team_row['baronKills'] = row['baronKills'] 
        team_row['dragonKills'] = row['dragonKills']    
        team_row['riftHeraldKills'] = row['riftHeraldKills'] / 2
        result = row["win"]
        if (result == "Win"):
            y_result.append("1")
        elif (result == "Fail"):
            y_result.append("0")
        team.append(team_row)

    team_df = pd.DataFrame(team)
    final_df = team_df.merge(grouped, on = "team")
    final_df = final_df.drop(columns = ["team"])
    #print(final_df)

    #for column in difference_list:
        
    #break;
    for index, row in final_df.iterrows():
        output_detail.append(pd.DataFrame(row).T)

In [45]:
df = pd.DataFrame()

for i in output_detail:
    df = pd.concat([df, i])
    
df["result"] = pd.Series(y_result).astype(int)
df = df.reset_index(drop = True)

In [46]:
df

Unnamed: 0,firstBlood,firstTower,firstBaron,firstDragon,firstRiftHerald,towerKills,inhibitorKills,baronKills,dragonKills,riftHeraldKills,...,damageDealtToObjectives,damageDealtToTurrets,visionScore,timeCCingOthers,totalDamageTaken,magicalDamageTaken,physicalDamageTaken,trueDamageTaken,score,result
0,1.0,1.0,0.0,1.0,1.0,0.363636,0.0,0.0,1.0,0.5,...,28425.0,11622.0,52.0,41.0,44263.0,9014.0,33746.0,1500.0,226.221667,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1991.0,849.0,35.0,41.0,43136.0,9299.0,31964.0,1870.0,309.783333,0
2,1.0,1.0,0.0,1.0,0.0,0.727273,0.333333,0.0,4.0,0.0,...,70285.0,27201.0,96.0,91.0,96687.0,38601.0,53508.0,4572.0,274.625,1
3,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,...,11769.0,10174.0,86.0,88.0,103166.0,50866.0,51769.0,526.0,275.14,0
4,1.0,1.0,0.0,0.0,0.0,0.363636,0.666667,0.0,0.0,0.0,...,8577.0,8577.0,0.0,145.0,57133.0,30004.0,23093.0,4032.0,259.825,1
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,158.0,91216.0,44301.0,43497.0,3414.0,268.48,0
6,1.0,0.0,0.0,0.0,0.0,0.363636,0.333333,0.0,0.0,0.0,...,9865.0,9865.0,0.0,152.0,125733.0,82176.0,41321.0,2232.0,245.688333,1
7,0.0,1.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,...,3220.0,3220.0,0.0,179.0,118878.0,70151.0,44868.0,3852.0,254.46,0
8,1.0,1.0,0.0,0.0,0.0,0.363636,0.666667,0.0,0.0,0.0,...,5083.0,5083.0,0.0,192.0,147247.0,79613.0,53218.0,14411.0,245.583333,1
9,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,...,2645.0,2645.0,0.0,201.0,180354.0,75705.0,92690.0,11956.0,242.7,0


### Model preprocessing

In [47]:
X = df.drop(["result"], axis = 1)
y = df.result

### Train test split

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
cat_feat = [
    "firstBlood",
    "firstTower",
    "firstBaron",
    "firstDragon",
    "firstDragon"
]
cat_transformer = Pipeline(steps = [
    ("onehot", OneHotEncoder())
])


log_feat = ['towerKills', 'inhibitorKills', 'baronKills',
       'dragonKills', 'riftHeraldKills', 'kills', 'deaths', 'assists',
       'totalDamageDealt', 'magicDamageDealt', 'physicalDamageDealt',
       'totalDamageDealtToChampions', 'magicDamageDealtToChampions',
       'physicalDamageDealtToChampions', 'trueDamageDealtToChampions',
       'goldEarned', 'champLevel', 'totalMinionsKilled', 'largestMultiKill',
       'killingSprees', 'doubleKills', 'tripleKills', 'quadraKills',
       'pentaKills', 'longestTimeSpentLiving', 'totalHeal',
       'damageDealtToObjectives', 'damageDealtToTurrets', 'visionScore',
       'timeCCingOthers', 'totalDamageTaken', 'magicalDamageTaken',
       'physicalDamageTaken', 'trueDamageTaken', 'score']
log_tranformer = Pipeline(steps = [
    ("log", FunctionTransformer(lambda x: x))
])



preproc = ColumnTransformer(transformers = [("cat", cat_transformer, cat_feat), 
                                            ("log", log_tranformer, log_feat)])

pl = Pipeline(steps = [("preprocessor", preproc), ("xgbclassifier", xgb.XGBClassifier())])

pl.fit(X_train, y_train)

preds = pl.predict(X_test)

In [50]:
preds

array([1, 1, 1, 0])

In [52]:
y_test

0     1
17    0
15    0
1     0
Name: result, dtype: int32

In [54]:
accuracy_score(preds, y_test)

0.5

We found out that the model is not accurate enough to predict the result of the game. Hence we dive deeper into the preprocessing.

## 3. Refine the model

#### As we developed our model, Riot Api didn't give us the optimal data. Instead, we choose to use the data parsed by previous users on kaggle.

dataset names:
Challenger_Ranked_Games.csv
GrandMaster_Ranked_Games.csv
high_diamond_ranked_10min.csv

source: kaggle.com/gyejr95/league-of-legends-challenger-ranked-games2020?fbclid=IwAR3Mo5lKWOWSEUSFJWHQ_Gn45opGB5GI_5LJoWpiRISw3OdFXEFCbiTpYLU

description:
The matches of high-ranked players. Includes columns containing similar information to the previous DataFrame parsed.

#### EDA

In [65]:
len(challenger_games)

26904

In [57]:
challenger_games.columns

Index(['gameId', 'gameDuraton', 'blueWins', 'blueFirstBlood', 'blueFirstTower',
       'blueFirstBaron', 'blueFirstDragon', 'blueFirstInhibitor',
       'blueDragonKills', 'blueBaronKills', 'blueTowerKills',
       'blueInhibitorKills', 'blueWardPlaced', 'blueWardkills', 'blueKills',
       'blueDeath', 'blueAssist', 'blueChampionDamageDealt', 'blueTotalGold',
       'blueTotalMinionKills', 'blueTotalLevel', 'blueAvgLevel',
       'blueJungleMinionKills', 'blueKillingSpree', 'blueTotalHeal',
       'blueObjectDamageDealt', 'redWins', 'redFirstBlood', 'redFirstTower',
       'redFirstBaron', 'redFirstDragon', 'redFirstInhibitor',
       'redDragonKills', 'redBaronKills', 'redTowerKills', 'redInhibitorKills',
       'redWardPlaced', 'redWardkills', 'redKills', 'redDeath', 'redAssist',
       'redChampionDamageDealt', 'redTotalGold', 'redTotalMinionKills',
       'redTotalLevel', 'redAvgLevel', 'redJungleMinionKills',
       'redKillingSpree', 'redTotalHeal', 'redObjectDamageDealt'],
    

In [66]:
len(grandmaster_games)

65896

In [58]:
grandmaster_games.columns

Index(['gameId', 'gameDuraton', 'blueWins', 'blueFirstBlood', 'blueFirstTower',
       'blueFirstBaron', 'blueFirstDragon', 'blueFirstInhibitor',
       'blueDragonKills', 'blueBaronKills', 'blueTowerKills',
       'blueInhibitorKills', 'blueWardPlaced', 'blueWardkills', 'blueKills',
       'blueDeath', 'blueAssist', 'blueChampionDamageDealt', 'blueTotalGold',
       'blueTotalMinionKills', 'blueTotalLevel', 'blueAvgLevel',
       'blueJungleMinionKills', 'blueKillingSpree', 'blueTotalHeal',
       'blueObjectDamageDealt', 'redWins', 'redFirstBlood', 'redFirstTower',
       'redFirstBaron', 'redFirstDragon', 'redFirstInhibitor',
       'redDragonKills', 'redBaronKills', 'redTowerKills', 'redInhibitorKills',
       'redWardPlaced', 'redWardkills', 'redKills', 'redDeath', 'redAssist',
       'redChampionDamageDealt', 'redTotalGold', 'redTotalMinionKills',
       'redTotalLevel', 'redAvgLevel', 'redJungleMinionKills',
       'redKillingSpree', 'redTotalHeal', 'redObjectDamageDealt'],
    

In [67]:
len(master_games)

107125

In [59]:
 master_games.columns

Index(['gameId', 'gameDuraton', 'blueWins', 'blueFirstBlood', 'blueFirstTower',
       'blueFirstBaron', 'blueFirstDragon', 'blueFirstInhibitor',
       'blueDragonKills', 'blueBaronKills', 'blueTowerKills',
       'blueInhibitorKills', 'blueWardPlaced', 'blueWardkills', 'blueKills',
       'blueDeath', 'blueAssist', 'blueChampionDamageDealt', 'blueTotalGold',
       'blueTotalMinionKills', 'blueTotalLevel', 'blueAvgLevel',
       'blueJungleMinionKills', 'blueKillingSpree', 'blueTotalHeal',
       'blueObjectDamageDealt', 'redWins', 'redFirstBlood', 'redFirstTower',
       'redFirstBaron', 'redFirstDragon', 'redFirstInhibitor',
       'redDragonKills', 'redBaronKills', 'redTowerKills', 'redInhibitorKills',
       'redWardPlaced', 'redWardkills', 'redKills', 'redDeath', 'redAssist',
       'redChampionDamageDealt', 'redTotalGold', 'redTotalMinionKills',
       'redTotalLevel', 'redAvgLevel', 'redJungleMinionKills',
       'redKillingSpree', 'redTotalHeal', 'redObjectDamageDealt'],
    

#### Further preprocessing the data

Normalizing the statistics

In [61]:
def per_minute(raw_df):
    properties = ["WardPlaced", "Wardkills", "Kills", "ChampionDamageDealt", "TotalGold", "TotalMinionKills", "JungleMinionKills", "TotalHeal", "ObjectDamageDealt"]
    raw_df["duration"] = raw_df["duration"] / 60
    game_duration = raw_df["duration"]
    for column in properties:
        raw_df[column] = raw_df[column] / game_duration
    return raw_df

In [62]:
def difference(blue_df, red_df):
    properties = ["WardPlaced", "Wardkills", "Kills", "ChampionDamageDealt", "TotalGold", "TotalMinionKills", "JungleMinionKills", "TotalHeal", "ObjectDamageDealt"]
    for column in properties:
        blue_columns_values = []
        red_columns_values = []
        for index, row in blue_df.iterrows():
            blue_columns_values.append(blue_df.loc[index][column] - red_df.loc[index][column])
            red_columns_values.append(red_df.loc[index][column] - blue_df.loc[index][column])
        blue_df[column] = blue_columns_values
        red_df[column] = red_columns_values
        blue_columns_values = []
        red_columns_values = []        
    return([blue_df, red_df])

In [68]:
def preprocessing(raw_df):
    column_list = []
    blue_df = raw_df.loc[:, raw_df.columns.str.startswith('blue')]
    for column in blue_df.columns:
        column_list.append(column[4:])
    
    blue_df.columns = column_list
    blue_df["duration"] = raw_df.get("gameDuraton")
    blue_df["team"] = "b"
    blue_df = per_minute(blue_df)
    
    column_list = []
    red_df = raw_df.loc[:, raw_df.columns.str.startswith('red')]
    for column in red_df.columns:
        column_list.append(column[3:])
    
    red_df.columns = column_list
    red_df["duration"]  = raw_df.get("gameDuraton")
    red_df["team"] = "r"
    red_df = per_minute(red_df)
    
    [blue_df, red_df] = difference(blue_df, red_df)
    
    complete_df = blue_df.append(red_df)
    complete_df = complete_df.drop(columns = ["TotalLevel"])
    complete_df["AvgLevel"] = complete_df["AvgLevel"] / 18
    complete_df["TowerKills"] = complete_df["TowerKills"] / 11
    complete_df = complete_df.reset_index()
    return complete_df

#### We run our model based upon the previous 5000 instances of three tables accordingly

challenger_model

In [69]:
output = preprocessing(challenger_games[:5000])

X = output.drop(["Wins"], axis = 1)
y = output.Wins

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

cat_feat = [
    "FirstBlood",
    "FirstTower",
    "FirstBaron",
    "FirstDragon",
    "FirstInhibitor"
]
cat_transformer = Pipeline(steps = [
    ("onehot", OneHotEncoder())
])


log_feat = ['DragonKills', 'BaronKills',
       'TowerKills', 'InhibitorKills', 'WardPlaced', 'Wardkills', 'Kills',
       'Death', 'Assist', 'ChampionDamageDealt', 'TotalGold',
       'TotalMinionKills', 'AvgLevel', 'JungleMinionKills', 'KillingSpree',
       'TotalHeal', 'ObjectDamageDealt', 'duration']
log_tranformer = Pipeline(steps = [
    ("log", FunctionTransformer(lambda x: x))
])



preproc = ColumnTransformer(transformers = [("cat", cat_transformer, cat_feat), 
                                            ("log", log_tranformer, log_feat)])

pl = Pipeline(steps = [("preprocessor", preproc), ("xgbclassifier", xgb.XGBClassifier())])

pl.fit(X_train, y_train)

preds = pl.predict(X_test)

print("The accuracy of the challenger model is", accuracy_score(preds, y_test))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

The accuracy of the challenger model is 0.986


grandmaster_model

In [72]:
output = preprocessing(grandmaster_games[:5000])

X = output.drop(["Wins"], axis = 1)
y = output.Wins

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

cat_feat = [
    "FirstBlood",
    "FirstTower",
    "FirstBaron",
    "FirstDragon",
    "FirstInhibitor"
]
cat_transformer = Pipeline(steps = [
    ("onehot", OneHotEncoder())
])


log_feat = ['DragonKills', 'BaronKills',
       'TowerKills', 'InhibitorKills', 'WardPlaced', 'Wardkills', 'Kills',
       'Death', 'Assist', 'ChampionDamageDealt', 'TotalGold',
       'TotalMinionKills', 'AvgLevel', 'JungleMinionKills', 'KillingSpree',
       'TotalHeal', 'ObjectDamageDealt', 'duration']
log_tranformer = Pipeline(steps = [
    ("log", FunctionTransformer(lambda x: x))
])



preproc = ColumnTransformer(transformers = [("cat", cat_transformer, cat_feat), 
                                            ("log", log_tranformer, log_feat)])

pl = Pipeline(steps = [("preprocessor", preproc), ("xgbclassifier", xgb.XGBClassifier())])

pl.fit(X_train, y_train)

preds = pl.predict(X_test)

print("The accuracy of the grandmaster model is", accuracy_score(preds, y_test))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

The accuracy of the grandmaster model is 0.988


master_model

In [74]:
output = preprocessing(master_games[:5000])

X = output.drop(["Wins"], axis = 1)
y = output.Wins

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

cat_feat = [
    "FirstBlood",
    "FirstTower",
    "FirstBaron",
    "FirstDragon",
    "FirstInhibitor"
]
cat_transformer = Pipeline(steps = [
    ("onehot", OneHotEncoder())
])


log_feat = ['DragonKills', 'BaronKills',
       'TowerKills', 'InhibitorKills', 'WardPlaced', 'Wardkills', 'Kills',
       'Death', 'Assist', 'ChampionDamageDealt', 'TotalGold',
       'TotalMinionKills', 'AvgLevel', 'JungleMinionKills', 'KillingSpree',
       'TotalHeal', 'ObjectDamageDealt', 'duration']
log_tranformer = Pipeline(steps = [
    ("log", FunctionTransformer(lambda x: x))
])



preproc = ColumnTransformer(transformers = [("cat", cat_transformer, cat_feat), 
                                            ("log", log_tranformer, log_feat)])

pl = Pipeline(steps = [("preprocessor", preproc), ("xgbclassifier", xgb.XGBClassifier())])

pl.fit(X_train, y_train)

preds = pl.predict(X_test)

print("The accuracy of the master model is", accuracy_score(preds, y_test))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

The accuracy of the master model is 0.993
