In [1]:
import pandas as pd
import json
import os
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
with open('data_40games.json', encoding="utf8") as f:
    tmp = f.read()

tmp = '[' + tmp + ']' # add brackets around it
match_json = json.loads(tmp) # confirm that it's valid json

# Creating a DataFrame of just Champion Names and running models on that

In [96]:
# Populating all the data from the matches
teamA_top = []
teamA_jg = []
teamA_mid = []
teamA_adc = []
teamA_supp = []
teamB_top = []
teamB_jg = []
teamB_mid = []
teamB_adc = []
teamB_supp = []
match_id = []
teamA_won = []

counter = 0
for match in match_json:
    
    # Figuring out what team the queried player is on
    name_occured = False
    for player in match['otherSummoners']:
        # Removing all whitespace from their name since people like to do that
        stripped_str = player.replace(' ', '')
        info_arr = stripped_str.split(',')
        team = info_arr[0]
        name = info_arr[1]
        # Checking if the name is queried player.
        if name == match['summoner'].replace(' ', ''):
            # If queried player is on team B and didWin field is True, append 1 to teamA_won. Otherwise, append 0.
            name_occured = True
            if team == 'A':
                if match['didWin']:
                    teamA_won.append(1)
                else:
                    teamA_won.append(0)
            # If queried player is on team B and didWin field is True, append 0 to teamA_won. Otherwise, append 1.
            if team == 'B':
                if match['didWin']:
                    teamA_won.append(0)
                else:
                    teamA_won.append(1)

    # If we can't figure out who won based on the above for loop, just don't append any lists.
    if name_occured == False:
        continue
    
    # Getting all champion information in the correct places
    for player in match['champsPlayed']:
        info_arr = player.split(',')
        team = info_arr[0]
        champID = int(info_arr[1])
        role = int(info_arr[2])
        if team == 'A':
            if role == 1:
                teamA_jg.append(champID)
            if role == 2:
                teamA_supp.append(champID)
            if role == 3:
                teamA_adc.append(champID)
            if role == 4:
                teamA_top.append(champID)
            if role == 5:
                teamA_mid.append(champID)
        if team == 'B':
            if role == 1:
                teamB_jg.append(champID)
            if role == 2:
                teamB_supp.append(champID)
            if role == 3:
                teamB_adc.append(champID)
            if role == 4:
                teamB_top.append(champID)
            if role == 5:
                teamB_mid.append(champID)

    # Get the matchID
    match_id.append(match['matchID'])

    counter+=1

                

In [40]:
# Dataframe of all basic match information
match_df = pd.DataFrame({'Match ID': match_id, 'Team A Won?': teamA_won, 'A Top': teamA_top, 'A Jungle': teamA_jg, 'A Mid': teamA_mid, 'A Bot': teamA_adc, 'A Support': teamA_supp, 'B Top': teamB_top, 'B Jungle': teamB_jg, 'B Mid': teamB_mid, 'B Bot': teamB_adc, 'B Support': teamB_supp})
# Drop all non-unique match IDs
match_df = match_df.drop_duplicates(subset='Match ID')
match_df

Unnamed: 0,Match ID,Team A Won?,A Top,A Jungle,A Mid,A Bot,A Support,B Top,B Jungle,B Mid,B Bot,B Support
0,4848223288,1,24,20,134,67,35,84,154,246,29,497
1,4848131748,1,777,78,4,119,902,31,62,517,429,888
2,4848087528,1,24,234,61,429,37,98,77,518,222,412
3,4847265470,0,268,141,8,202,12,86,56,238,145,497
4,4848043757,1,68,203,142,51,235,23,121,8,15,12
...,...,...,...,...,...,...,...,...,...,...,...,...
324818,4814345455,1,266,56,517,74,235,86,57,45,29,412
324820,4813002074,0,58,19,115,29,12,897,233,101,18,147
324822,4819650090,0,24,104,75,74,432,31,48,39,67,902
324823,4812981843,1,86,154,268,22,117,240,104,245,18,267


In [41]:
# Converting all Champ IDs to their real name
with open('championIDs.json', encoding="utf8") as f:
    tmp = f.read()
    
champ_json = json.loads(tmp) # confirm that it's valid json

# https://ddragon.leagueoflegends.com/api/versions.json is where I found the latest version of this champ information
champ_dict = {}
for champ in champ_json['data']:
    champ_dict[int(champ_json['data'][champ]['key'])] = champ

In [42]:
# Converting all champ IDs to names in dataframe
for column in match_df.columns:
    if column != 'Match ID' and column != 'Team A Won?':
        match_df[column] = match_df[column].map(champ_dict)

match_df

Unnamed: 0,Match ID,Team A Won?,A Top,A Jungle,A Mid,A Bot,A Support,B Top,B Jungle,B Mid,B Bot,B Support
0,4848223288,1,Jax,Nunu,Syndra,Vayne,Shaco,Akali,Zac,Qiyana,Twitch,Rakan
1,4848131748,1,Yone,Poppy,TwistedFate,Draven,Milio,Chogath,MonkeyKing,Sylas,Kalista,Renata
2,4848087528,1,Jax,Viego,Orianna,Kalista,Sona,Shen,Udyr,Neeko,Jinx,Thresh
3,4847265470,0,Azir,Kayn,Vladimir,Jhin,Alistar,Garen,Nocturne,Zed,Kaisa,Rakan
4,4848043757,1,Rumble,Kindred,Zoe,Caitlyn,Senna,Tryndamere,Khazix,Vladimir,Sivir,Alistar
...,...,...,...,...,...,...,...,...,...,...,...,...
324818,4814345455,1,Aatrox,Nocturne,Sylas,Heimerdinger,Senna,Garen,Maokai,Veigar,Twitch,Thresh
324820,4813002074,0,Renekton,Warwick,Ziggs,Twitch,Alistar,KSante,Briar,Xerath,Tristana,Seraphine
324822,4819650090,0,Jax,Graves,Nasus,Heimerdinger,Bard,Chogath,Trundle,Irelia,Vayne,Milio
324823,4812981843,1,Garen,Zac,Azir,Ashe,Lulu,Kled,Graves,Ekko,Tristana,Nami


In [43]:
# Trying MLP

dummied = pd.get_dummies(match_df)
X = dummied.drop(['Match ID', 'Team A Won?'], axis=1)
y = dummied['Team A Won?']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .1, random_state=42)

mlp = MLPClassifier().fit(X_train, y_train)
mlp.score(X_test, y_test)



0.5052713783678251

In [44]:
# Trying Random Forest Ensemble
clf = RandomForestClassifier(max_depth = None, random_state=42).fit(X_train, y_train)
clf.score(X_test, y_test)

0.5228426395939086

# Creating a DataFrame of the Champion's Winrate in that Role and running models on that

In [93]:
# This cell creates all the winrates for every champ/role combination

games_in_dict = {}
win_rate_dict = {}
# Getting every game/index in match_df that champ/role was in
for index, row in match_df.iterrows():
    for column in ['A Top', 'A Jungle', 'A Mid', 'A Bot', 'A Support', 'B Top', 'B Jungle', 'B Mid', 'B Bot', 'B Support']:
        role = column.split(' ')[1]
        champion = row[column]
        # row[column] will spit out a champion name
        if champion + ' ' + role not in games_in_dict:
            games_in_dict[champion + ' ' + role] = [index]
        elif champion + ' ' + role in games_in_dict:
            games_in_dict[champion + ' ' + role].append(index)

# Getting the win rate based on the games that champ/role was in
for champ_role in games_in_dict:
    total_games = len(games_in_dict[champ_role])
    games_won = 0
    for game_index in games_in_dict[champ_role]:
        champ_name = champ_role.split(' ')[0]
        role = champ_role.split(' ')[1]
        if match_df.loc[game_index]['A' + ' ' + role] == champ_name:
            if match_df.loc[game_index]['Team A Won?'] == 1:
                games_won += 1
        if match_df.loc[game_index]['B' + ' ' + role] == champ_name:
            if match_df.loc[game_index]['Team A Won?'] == 0: 
                games_won += 1
    
    win_rate_dict[champ_role] = games_won / total_games

# Function call to return that champs winrate based on calculated win_rate_dict
def win_rate(champion, role, dict):
    return dict[champion + ' ' + role]

In [94]:
winrate_df = match_df.copy(deep=True)
for column in winrate_df.columns:
    if column != 'Match ID' and column != 'Team A Won?':
        role = column.split(' ')[1]
        winrate_df[column] = winrate_df[column].transform(lambda x: win_rate(x, role, win_rate_dict))

In [95]:
winrate_df

Unnamed: 0,Match ID,Team A Won?,A Top,A Jungle,A Mid,A Bot,A Support,B Top,B Jungle,B Mid,B Bot,B Support
0,4848223288,1,0.521313,0.503173,0.503341,0.516784,0.472906,0.497863,0.505829,0.502770,0.522730,0.506754
1,4848131748,1,0.479153,0.483957,0.505706,0.519829,0.494923,0.502588,0.466134,0.520461,0.510774,0.506617
2,4848087528,1,0.521313,0.485958,0.493604,0.510774,0.512271,0.497370,0.508568,0.506992,0.507348,0.514714
3,4847265470,0,0.549865,0.503162,0.506785,0.494525,0.509598,0.490364,0.486515,0.503803,0.480917,0.506754
4,4848043757,1,0.493161,0.528064,0.514953,0.483096,0.517835,0.523136,0.495181,0.506785,0.482210,0.509598
...,...,...,...,...,...,...,...,...,...,...,...,...
324818,4814345455,1,0.487486,0.486515,0.520461,0.472028,0.517835,0.490364,0.477379,0.464943,0.522730,0.514714
324820,4813002074,0,0.489415,0.470032,0.485156,0.522730,0.509598,0.510396,0.489342,0.471320,0.521180,0.478411
324822,4819650090,0,0.521313,0.517958,0.503480,0.472028,0.521975,0.502588,0.484444,0.496939,0.516784,0.494923
324823,4812981843,1,0.490364,0.505829,0.488907,0.527675,0.483742,0.518345,0.517958,0.505618,0.521180,0.483875


In [48]:
X = winrate_df.drop(['Match ID', 'Team A Won?'], axis=1)
y = winrate_df['Team A Won?']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .1, random_state=42)

mlp = MLPClassifier(random_state=42).fit(X_train, y_train)
mlp.score(X_test, y_test)

0.5508004685669661

In [49]:
# Trying Random Forest Ensemble
clf = RandomForestClassifier(max_depth = 5, random_state=42).fit(X_train, y_train)
clf.score(X_test, y_test)

0.5457243264349864

In [50]:
# Trying KNN
knn = KNeighborsClassifier().fit(X_train, y_train)
knn.score(X_test, y_test)

0.51308082780164

# Adding Bot/Support Duo Winrate to existing Winrate DataFrame and running models on that

In [98]:
# This cell creates all the winrates for every bot lane duo combination

duo_games_in_dict = {}
duo_win_rate_dict = {}
# Getting every game/index in match_df that champ/role was in
for index, row in match_df.iterrows():
    for duo in [row['A Bot'] + ' ' + row['A Support'], row['B Bot'] + ' ' + row['B Support']]:
        if duo not in duo_games_in_dict:
            duo_games_in_dict[duo] = [index]
        elif duo in duo_games_in_dict:
            duo_games_in_dict[duo].append(index)

# Getting the win rate based on the games that duo was in
for duo in duo_games_in_dict:
    total_games = len(duo_games_in_dict[duo])
    games_won = 0
    for game_index in duo_games_in_dict[duo]:
        bot_name = duo.split(' ')[0]
        supp_name = duo.split(' ')[1]
        if match_df.loc[game_index]['A Bot'] == bot_name and match_df.loc[game_index]['A Support'] == supp_name:
            if match_df.loc[game_index]['Team A Won?'] == 1:
                games_won += 1
        if match_df.loc[game_index]['B Bot'] == bot_name and match_df.loc[game_index]['B Support'] == supp_name:
            if match_df.loc[game_index]['Team A Won?'] == 0: 
                games_won += 1
    
    duo_win_rate_dict[duo] = games_won / total_games

# Function call to return that duos winrate based on duo_win_rate_dict
def duo_win_rate(bot, supp, dict):
    duo = bot + ' ' + supp
    return dict[duo]

In [173]:
# Getting the duo winrates for both teams
winrate_df['A Duo'] = match_df.apply(lambda x: duo_win_rate(x['A Bot'], x['A Support'], duo_win_rate_dict), axis = 1)
winrate_df['B Duo'] = match_df.apply(lambda x: duo_win_rate(x['B Bot'], x['B Support'], duo_win_rate_dict), axis = 1)
winrate_df

Unnamed: 0,Match ID,Team A Won?,A Top,A Jungle,A Mid,A Bot,A Support,B Top,B Jungle,B Mid,B Bot,B Support,A Duo,B Duo
0,4848223288,1,0.521313,0.503173,0.503341,0.516784,0.472906,0.497863,0.505829,0.502770,0.522730,0.506754,0.551724,0.554307
1,4848131748,1,0.479153,0.483957,0.505706,0.519829,0.494923,0.502588,0.466134,0.520461,0.510774,0.506617,0.536680,0.498225
2,4848087528,1,0.521313,0.485958,0.493604,0.510774,0.512271,0.497370,0.508568,0.506992,0.507348,0.514714,0.596154,0.550744
3,4847265470,0,0.549865,0.503162,0.506785,0.494525,0.509598,0.490364,0.486515,0.503803,0.480917,0.506754,0.510638,0.468321
4,4848043757,1,0.493161,0.528064,0.514953,0.483096,0.517835,0.523136,0.495181,0.506785,0.482210,0.509598,0.472785,0.524064
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324818,4814345455,1,0.487486,0.486515,0.520461,0.472028,0.517835,0.490364,0.477379,0.464943,0.522730,0.514714,0.433333,0.526749
324820,4813002074,0,0.489415,0.470032,0.485156,0.522730,0.509598,0.510396,0.489342,0.471320,0.521180,0.478411,0.493007,0.538462
324822,4819650090,0,0.521313,0.517958,0.503480,0.472028,0.521975,0.502588,0.484444,0.496939,0.516784,0.494923,0.250000,0.521595
324823,4812981843,1,0.490364,0.505829,0.488907,0.527675,0.483742,0.518345,0.517958,0.505618,0.521180,0.483875,0.495050,0.494253


In [284]:
duo_win_rate_dict['MissFortune Blitzcrank']

0.5358851674641149

In [104]:
# Trying MLP
X = winrate_df.drop(['Match ID', 'Team A Won?'], axis=1)
y = winrate_df['Team A Won?']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .1, random_state=42)

mlp = MLPClassifier(random_state=42).fit(X_train, y_train)
mlp.score(X_test, y_test)

0.5753221397891448

In [105]:
# Trying Random Forest Ensemble
clf = RandomForestClassifier(max_depth = 5, random_state=42).fit(X_train, y_train)
clf.score(X_test, y_test)

0.5656384224912143

In [106]:
# Trying KNN
knn = KNeighborsClassifier().fit(X_train, y_train)
knn.score(X_test, y_test)

0.5279968762202265

# Adding Opposing Laner Win Rates (Top Aatrox vs Top Riven as an Example)

In [161]:
# This cell creates all the winrates for every lane matchup X vs. X combination. Provides a different dictionary key for X vs. Y and Y vs. X.

opp_games_in_dict = {}
opp_win_rate_dict = {}
# Getting every game/index in match_df that champ/role was in
for index, row in match_df.iterrows():
    for column in ['A Top', 'A Jungle', 'A Mid', 'A Bot', 'A Support', 'B Top', 'B Jungle', 'B Mid', 'B Bot', 'B Support']:
        team = column.split(' ')[0]
        role = column.split(' ')[1]
        champion = row[column]
        if team == 'A':
            opposing_champion = row['B ' + role]
        else:
            opposing_champion = row['A ' + role]
        # row[column] will spit out a champion name
        if champion + ' ' + role + ',' + opposing_champion + ' ' + role not in opp_games_in_dict:
            opp_games_in_dict[champion + ' ' + role + ',' + opposing_champion + ' ' + role] = [index]
        else:
            opp_games_in_dict[champion + ' ' + role + ',' + opposing_champion + ' ' + role].append(index)

# Getting the win rate based on the games that champ/role was in
for lane_matchup in opp_games_in_dict:
    total_games = len(opp_games_in_dict[lane_matchup])
    games_won = 0
    for game_index in opp_games_in_dict[lane_matchup]:
        left_champ_name = lane_matchup.split(',')[0].split(' ')[0]
        role = lane_matchup.split(',')[0].split(' ')[1]
        # Now we check if the left side in the X vs. X matchup won or not (So Riven vs. Aatrox will have y winrate, and Aatrox vs Riven will have (1-y) winrate)
        # The above opp_game_in_dict will already filter out to only games that contain the wanted X vs. X matchup
        if match_df.loc[game_index]['A' + ' ' + role] == left_champ_name:
            if match_df.loc[game_index]['Team A Won?'] == 1:
                games_won += 1
        if match_df.loc[game_index]['B' + ' ' + role] == left_champ_name:
            if match_df.loc[game_index]['Team A Won?'] == 0: 
                games_won += 1
    
    opp_win_rate_dict[lane_matchup] = games_won / total_games

# Function call to return that champs winrate based on calculated win_rate_dict
def opp_win_rate(champion, opposing_champion, role, dict):
    return dict[champion + ' ' + role + ',' + opposing_champion + ' ' + role]

In [181]:
# Adding Team A's laner vs their opposing Team B Laner Winrate.
# DO NOT ADD a Team B's laner vs opposing Team A Laner winrate. This is linearly correlated to the above column. It will not do well to add it as a feature.
for role in ['Top', 'Jungle', 'Mid', 'Bot', 'Support']:
    winrate_df['A ' + role + ' MU'] = match_df.apply(lambda x: opp_win_rate(x['A ' + role], x['B ' + role], role, opp_win_rate_dict), axis = 1)

In [285]:
opp_win_rate_dict['Riven Top,Aatrox Top']

0.5397301349325337

In [286]:
opp_win_rate_dict['Aatrox Top,Riven Top']

0.46026986506746626

In [182]:
winrate_df

Unnamed: 0,Match ID,Team A Won?,A Top,A Jungle,A Mid,A Bot,A Support,B Top,B Jungle,B Mid,B Bot,B Support,A Duo,B Duo,A Top MU,A Jungle MU,A Mid MU,A Bot MU,A Support MU
0,4848223288,1,0.521313,0.503173,0.503341,0.516784,0.472906,0.497863,0.505829,0.502770,0.522730,0.506754,0.551724,0.554307,0.492611,0.408602,0.512658,0.502732,0.512821
1,4848131748,1,0.479153,0.483957,0.505706,0.519829,0.494923,0.502588,0.466134,0.520461,0.510774,0.506617,0.536680,0.498225,0.493151,0.476190,0.436224,0.531835,0.552239
2,4848087528,1,0.521313,0.485958,0.493604,0.510774,0.512271,0.497370,0.508568,0.506992,0.507348,0.514714,0.596154,0.550744,0.563536,0.544000,0.500000,0.462908,0.484507
3,4847265470,0,0.549865,0.503162,0.506785,0.494525,0.509598,0.490364,0.486515,0.503803,0.480917,0.506754,0.510638,0.468321,0.400000,0.500000,0.468750,0.511128,0.498871
4,4848043757,1,0.493161,0.528064,0.514953,0.483096,0.517835,0.523136,0.495181,0.506785,0.482210,0.509598,0.472785,0.524064,0.443038,0.535117,0.580952,0.513889,0.508224
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324818,4814345455,1,0.487486,0.486515,0.520461,0.472028,0.517835,0.490364,0.477379,0.464943,0.522730,0.514714,0.433333,0.526749,0.465549,0.459184,0.500000,0.500000,0.484453
324820,4813002074,0,0.489415,0.470032,0.485156,0.522730,0.509598,0.510396,0.489342,0.471320,0.521180,0.478411,0.493007,0.538462,0.520522,0.512195,0.519231,0.512456,0.555556
324822,4819650090,0,0.521313,0.517958,0.503480,0.472028,0.521975,0.502588,0.484444,0.496939,0.516784,0.494923,0.250000,0.521595,0.507937,0.484848,0.428571,0.666667,0.537838
324823,4812981843,1,0.490364,0.505829,0.488907,0.527675,0.483742,0.518345,0.517958,0.505618,0.521180,0.483875,0.495050,0.494253,0.435294,0.503226,0.416667,0.495356,0.522901


In [183]:
# Trying MLP
X = winrate_df.drop(['Match ID', 'Team A Won?'], axis=1)
y = winrate_df['Team A Won?']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .1, random_state=42)
 
mlp = MLPClassifier(random_state=42).fit(X_train, y_train)
mlp.score(X_test, y_test)

0.6463881296368607

# Incoporating Player Winrate on that Champion (Addendum Since this Includes Player Skill)

In [268]:
with open('playerStats.json', encoding="utf8") as f:
    tmp = f.read()

tmp = '[' + tmp + ']' # add brackets around it
player_json = json.loads(tmp) # confirm that it's valid json

In [269]:
# This dataframe will also include the summoner name, not just the champ ID
# Populating all the data from the matches
teamA_top = []
teamA_jg = []
teamA_mid = []
teamA_adc = []
teamA_supp = []
teamB_top = []
teamB_jg = []
teamB_mid = []
teamB_adc = []
teamB_supp = []
teamA_top_name = []
teamA_jg_name = []
teamA_mid_name = []
teamA_adc_name = []
teamA_supp_name = []
teamB_top_name = []
teamB_jg_name = []
teamB_mid_name = []
teamB_adc_name = []
teamB_supp_name = []



match_id = []
teamA_won = []

for match in match_json:
    
    # Figuring out what team the queried player is on
    name_occured = False
    for player in match['otherSummoners']:
        # Removing all whitespace from their name since people like to do that
        stripped_str = player.replace(' ', '')
        info_arr = stripped_str.split(',')
        team = info_arr[0]
        name = info_arr[1]
        # Checking if the name is queried player.
        if name == match['summoner'].replace(' ', ''):
            # If queried player is on team B and didWin field is True, append 1 to teamA_won. Otherwise, append 0.
            name_occured = True
            if team == 'A':
                if match['didWin']:
                    teamA_won.append(1)
                else:
                    teamA_won.append(0)
            # If queried player is on team B and didWin field is True, append 0 to teamA_won. Otherwise, append 1.
            if team == 'B':
                if match['didWin']:
                    teamA_won.append(0)
                else:
                    teamA_won.append(1)

    # If we can't figure out who won based on the above for loop, just don't append any lists.
    if name_occured == False:
        continue
    
    # Getting all champion information in the correct places
    for i in range(0,10):
        info_arr = match['champsPlayed'][i].split(',')
        team = info_arr[0]
        champID = int(info_arr[1])
        role = int(info_arr[2])
        summoner = match['otherSummoners'][i].split(',')[1]
        if team == 'A':
            if role == 1:
                teamA_jg.append(champID)
                teamA_jg_name.append(summoner)
            if role == 2:
                teamA_supp.append(champID)
                teamA_supp_name.append(summoner)
            if role == 3:
                teamA_adc.append(champID)
                teamA_adc_name.append(summoner)
            if role == 4:
                teamA_top.append(champID)
                teamA_top_name.append(summoner)
            if role == 5:
                teamA_mid.append(champID)
                teamA_mid_name.append(summoner)
        if team == 'B':
            if role == 1:
                teamB_jg.append(champID)
                teamB_jg_name.append(summoner)
            if role == 2:
                teamB_supp.append(champID)
                teamB_supp_name.append(summoner)
            if role == 3:
                teamB_adc.append(champID)
                teamB_adc_name.append(summoner)
            if role == 4:
                teamB_top.append(champID)
                teamB_top_name.append(summoner)
            if role == 5:
                teamB_mid.append(champID)
                teamB_mid_name.append(summoner)

    # Get the matchID
    match_id.append(match['matchID'])

                

In [270]:
# Dataframe of all basic match information
name_match_df = pd.DataFrame({'Match ID': match_id, 'Team A Won?': teamA_won, 'A Top': teamA_top, 'A Top Name': teamA_top_name, 'A Jungle': teamA_jg, 'A Jungle Name': teamA_jg_name, 'A Mid': teamA_mid, 'A Mid Name': teamA_mid_name, 'A Bot': teamA_adc, 'A Bot Name': teamA_adc_name, 'A Support': teamA_supp, 'A Support Name': teamA_supp_name, 'B Top': teamB_top, 'B Top Name': teamB_top_name, 'B Jungle': teamB_jg, 'B Jungle Name': teamB_jg_name, 'B Mid': teamB_mid, 'B Mid Name': teamB_mid_name, 'B Bot': teamB_adc, 'B Bot Name': teamB_adc_name, 'B Support': teamB_supp, 'B Support Name': teamB_supp_name})
# Drop all non-unique match IDs
name_match_df = name_match_df.drop_duplicates(subset='Match ID')
name_match_df

Unnamed: 0,Match ID,Team A Won?,A Top,A Top Name,A Jungle,A Jungle Name,A Mid,A Mid Name,A Bot,A Bot Name,...,B Top,B Top Name,B Jungle,B Jungle Name,B Mid,B Mid Name,B Bot,B Bot Name,B Support,B Support Name
0,4848223288,1,24,lunacia,20,pbj n milk,134,Jaggieman,67,Impmon,...,84,Decøy,154,Q S,246,ćam,29,azona98,497,dtro1
1,4848131748,1,777,Decøy,78,lunacia,4,who dat who dat,119,TyChee,...,31,riversided,62,readysetgo1,517,Pobelter,429,da xi gua,888,100 Busio
2,4848087528,1,24,lunacia,234,OxyHatman,61,Z3shoo,429,da xi gua,...,98,TTV IMainTopLane,77,FULLMUTEANDCLEAR,518,roblox boy,222,Tomo09,412,100 Busio
3,4847265470,0,268,Kzykendy,141,Yuu13,8,Matcha Tamago,202,Reforge99,...,86,PaIco,56,Strova,238,ZED04,145,lunacia,497,care chimba
4,4848043757,1,68,I will trade,203,shene,142,Cupic Diff,51,TyChee,...,23,lunacia,121,Orenji Momoko,8,Matcha Tamago,15,Łisten,12,Neøø
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324818,4814345455,1,266,Mcmaster Dabao,56,Hwalp,517,Oobama Ben Lagen,74,One By One,...,86,Soundtrack,57,Animal Orchestra,45,AR47,29,JINX96,412,january rain
324820,4813002074,0,58,Benjmin Dover,19,Aberrant Spectre,115,Gummyussy,29,jjbota420,...,897,Moiubiadan,233,DREGGMAN777,101,One By One,18,ráta,147,thatbluemist
324822,4819650090,0,24,BuZai ShengQi,104,biscuitflavor,75,LL Facterr,74,One By One,...,31,ViVïd,48,Fãtëdº,39,ashewipe,67,adc1234561,902,DeductedCarlos
324823,4812981843,1,86,Djângo,154,Aberrant Spectre,268,Ms Teemo,22,JBK 1,...,240,Meest,104,Flexserve,245,Hinwu,18,One By One,267,Ana franck


In [271]:
player_win_rate_dict = {}

# Making a 2d dictionary for each summoner and all of the champs they play. The value in the nested dictionary will be that champID's win rate.
for player_champ in player_json:
    summoner = player_champ['summoner']
    champID = player_champ['championPlayed']
    win_rate = player_champ['totalWon'] / player_champ['totalPlayed']
    # Create an empty dictionary if the dictionary does not have our current summoner
    if summoner not in player_win_rate_dict:
        player_win_rate_dict[summoner] = {}
    # Add to the summoner's dictionary the champID and have that value be its win rate
    player_win_rate_dict[summoner][champID] = win_rate

def win_rate_by_player(summoner, champID, dict):
    if summoner not in dict or champID not in dict[summoner]:
        return .5
    else:
        return dict[summoner][champID]

In [272]:
# Adding to winrate_df a separate column for that player's winrate
for column in ['A Top', 'A Jungle', 'A Mid', 'A Bot', 'A Support', 'B Top', 'B Jungle', 'B Mid', 'B Bot', 'B Support']:
    winrate_df[column + ' Player Winrate'] = name_match_df.apply(lambda x: win_rate_by_player(x[column + ' Name'], x[column], player_win_rate_dict), axis = 1)

In [273]:
winrate_df

Unnamed: 0,Match ID,Team A Won?,A Top,A Jungle,A Mid,A Bot,A Support,B Top,B Jungle,B Mid,...,A Top Player Winrate,A Jungle Player Winrate,A Mid Player Winrate,A Bot Player Winrate,A Support Player Winrate,B Top Player Winrate,B Jungle Player Winrate,B Mid Player Winrate,B Bot Player Winrate,B Support Player Winrate
0,4848223288,1,0.521313,0.503173,0.503341,0.516784,0.472906,0.497863,0.505829,0.502770,...,0.602410,0.525000,0.500000,0.535714,0.615385,0.400000,0.548287,0.546763,0.496711,0.560000
1,4848131748,1,0.479153,0.483957,0.505706,0.519829,0.494923,0.502588,0.466134,0.520461,...,0.545455,1.000000,0.750000,0.516204,0.500000,0.307692,0.250000,0.555556,0.569620,0.375000
2,4848087528,1,0.521313,0.485958,0.493604,0.510774,0.512271,0.497370,0.508568,0.506992,...,0.602410,0.583333,0.700000,0.569620,0.555195,0.550000,0.662162,0.500000,0.583333,0.666667
3,4847265470,0,0.549865,0.503162,0.506785,0.494525,0.509598,0.490364,0.486515,0.503803,...,0.593878,0.636364,0.543796,0.666667,0.510204,0.530201,0.548387,0.600897,1.000000,0.477064
4,4848043757,1,0.493161,0.528064,0.514953,0.483096,0.517835,0.523136,0.495181,0.506785,...,0.750000,0.584016,0.714286,0.476923,0.529412,0.375000,0.541667,0.543796,0.571429,0.391304
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324818,4814345455,1,0.487486,0.486515,0.520461,0.472028,0.517835,0.490364,0.477379,0.464943,...,0.542857,0.409836,0.625000,0.619048,0.468750,0.333333,0.500000,0.568627,0.606061,0.657895
324820,4813002074,0,0.489415,0.470032,0.485156,0.522730,0.509598,0.510396,0.489342,0.471320,...,0.166667,0.483696,0.000000,0.500000,0.000000,0.523077,0.550000,1.000000,0.475000,0.750000
324822,4819650090,0,0.521313,0.517958,0.503480,0.472028,0.521975,0.502588,0.484444,0.496939,...,0.200000,0.602151,0.523810,0.619048,0.500000,1.000000,0.400000,0.666667,0.454545,0.520000
324823,4812981843,1,0.490364,0.505829,0.488907,0.527675,0.483742,0.518345,0.517958,0.505618,...,0.542969,0.500000,0.577287,0.500000,0.375000,0.530466,0.576923,0.000000,0.263158,0.250000


In [281]:
# Trying MLP
X = winrate_df.drop(['Match ID', 'Team A Won?'], axis=1)
y = winrate_df['Team A Won?']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state=42)
 
mlp = MLPClassifier(random_state=42).fit(X_train, y_train)
mlp.score(X_test, y_test)

0.8939825842477254

In [None]:
from sklearn.model_selection import GridSearchCV

param_dist = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [1e-4, 1e-3, 1e-2, 1e-1],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'max_iter': [300, 500, 700],
    'random_state': [42]
}

mlp = MLPClassifier()
mlp_cv = GridSearchCV(mlp, param_grid=param_dist, scoring='accuracy', n_jobs=-1)
mlp_cv.fit(X_train, y_train)

In [None]:
mlp_cv.best_estimator_.score(X_test, y_test)