<h1> AI Model that accurately predicts the outcome of any champion matchup in lane </h1>

<h2> Webscrape </h2>

<h4>Webscraping for list of all availible champions, respective winrates, pick rates, and ban rates. </h4>

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

response = requests.get('https://raw.communitydragon.org/latest/plugins/rcp-be-lol-game-data/global/default/v1/champions/')
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
links = []
for a_tag in soup.find_all('a', href=True):
    href = a_tag['href']
    if href.endswith('.json') and not href =='-1.json':
        links.append(f"https://raw.communitydragon.org/latest/plugins/rcp-be-lol-game-data/global/default/v1/champions/{href}")

def json_extract(obj, key):
    arr = []

    def extract(obj, arr, key):
        if isinstance(obj, dict):
            for k, v in obj.items():
                if k == key:
                    arr.append(v)
                elif isinstance(v, (dict,list)):
                    extract(v, arr, key)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)

        return arr
    
    values = extract(obj, arr, key)
    return values

champion_names = []
champion_ids = []

for url in links:
    champion_json = requests.get(url).json()
    
    id = json_extract(champion_json, 'id')
    name = json_extract(champion_json, 'name')
    champion_names.append(name[0])
    champion_ids.append(id[0])

championId_dict = dict(map(lambda i, j: (int(i), j), champion_ids, champion_names))

In [2]:
championId_dict #list of all champion names scraped from community dragon

{1: 'Annie',
 10: 'Kayle',
 101: 'Xerath',
 102: 'Shyvana',
 103: 'Ahri',
 104: 'Graves',
 105: 'Fizz',
 106: 'Volibear',
 107: 'Rengar',
 11: 'Master Yi',
 110: 'Varus',
 111: 'Nautilus',
 112: 'Viktor',
 113: 'Sejuani',
 114: 'Fiora',
 115: 'Ziggs',
 117: 'Lulu',
 119: 'Draven',
 12: 'Alistar',
 120: 'Hecarim',
 121: "Kha'Zix",
 122: 'Darius',
 126: 'Jayce',
 127: 'Lissandra',
 13: 'Ryze',
 131: 'Diana',
 133: 'Quinn',
 134: 'Syndra',
 136: 'Aurelion Sol',
 14: 'Sion',
 141: 'Kayn',
 142: 'Zoe',
 143: 'Zyra',
 145: "Kai'Sa",
 147: 'Seraphine',
 15: 'Sivir',
 150: 'Gnar',
 154: 'Zac',
 157: 'Yasuo',
 16: 'Soraka',
 161: "Vel'Koz",
 163: 'Taliyah',
 164: 'Camille',
 166: 'Akshan',
 17: 'Teemo',
 18: 'Tristana',
 19: 'Warwick',
 2: 'Olaf',
 20: 'Nunu & Willump',
 200: "Bel'Veth",
 201: 'Braum',
 202: 'Jhin',
 203: 'Kindred',
 21: 'Miss Fortune',
 22: 'Ashe',
 221: 'Zeri',
 222: 'Jinx',
 223: 'Tahm Kench',
 23: 'Tryndamere',
 233: 'Briar',
 234: 'Viego',
 235: 'Senna',
 236: 'Lucian',
 2

In [4]:
import re
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

response = requests.get('https://u.gg/lol/champions', headers=headers)
response.raise_for_status()  # Check if the request was successful
soup = BeautifulSoup(response.text, 'html.parser')
baseURL = "https://u.gg/"

champContainer = soup.find('div', class_='champion-home-page')
champion_data = []
if champContainer:
    links = champContainer.find_all('a')

    for link in links:
        url = baseURL + link['href']
        resp = requests.get(url, headers=headers)
        if resp.status_code == 200:
            html = resp.text
            soup1 = BeautifulSoup(html, 'html.parser')
            rankingStats = soup1.find('div', class_='champion-ranking-stats-normal')
            if rankingStats:
                text = rankingStats.text.strip()
                tier_pattern = r"([A-S]\+?Tier)"
                redTier_pattern = r"([A-S]\+)"
                win_rate_pattern = r"(\d+(\.\d+)?%)Win Rate"
                rank_pattern = r"(\d+ / \d+)Rank"
                pick_rate_pattern = r"(\d+\.\d+%)Pick Rate"
                ban_rate_pattern = r"(\d+\.\d+%)Ban Rate"
                matches_pattern = r"(\d+,?\d*,?\d*)Matches"
                
                # Apply patterns
                tier = re.search(tier_pattern, text).group(1)
                win_rate = re.search(win_rate_pattern, text).group(1)
                rank = re.search(rank_pattern, text).group(1)
                pick_rate = re.search(pick_rate_pattern, text).group(1)
                ban_rate = re.search(ban_rate_pattern, text).group(1)
                matches = re.search(matches_pattern, text).group(1)

                numbers = rank.split('/')

                # Strip whitespace and convert to integers
                numerator = int(numbers[0].strip())
                denominator = int(numbers[1].strip())

                # Perform division
                rank = f"{((numerator / denominator)*100):.2f}%"

                name = soup1.find('span', class_='champion-name').text.strip()
                if name == 'Red Kayn':
                    name = 'Kayn'

                champion_data.append({
                'name' : name,
                'tier': tier,
                'win_rate': win_rate,
                'rank': rank,
                'pick_rate': pick_rate,
                'ban_rate': ban_rate,
                'matches': matches
                })

In [5]:
champion_data_df = pd.DataFrame(champion_data)
champion_data_df #Data of all champions scraped from u.gg

Unnamed: 0,name,tier,win_rate,rank,pick_rate,ban_rate,matches
0,Aatrox,ATier,49.96%,64.41%,11.4%,16.2%,286783
1,Ahri,CTier,49.66%,67.24%,8.9%,10.1%,224349
2,Akali,DTier,49%,82.76%,4.8%,12.0%,119859
3,Akshan,S+Tier,52.48%,5.17%,2.7%,8.6%,67908
4,Alistar,STier,50.75%,27.27%,6.3%,2.3%,158238
...,...,...,...,...,...,...,...
162,Zeri,CTier,49.72%,59.26%,12.0%,4.6%,300497
163,Ziggs,DTier,47.88%,93.10%,1.4%,0.3%,35191
164,Zilean,ATier,50.25%,40.91%,2.3%,0.7%,57974
165,Zoe,ATier,50.89%,32.76%,2.5%,1.2%,61492


<h2> Database Postgresql </h2>

<h4> Connection to DataBase for easy retrieval/insertion of data to prevent exceeding ratelimit on API and general efficient performance </h4>

In [5]:
from sqlalchemy import text, create_engine

db_username = "postgres"
db_password = "123123123"
db_host = "localhost"
db_port = "5432"
db_name = "lol_analytics"

def create_db_connection_string(db_username, db_password, db_host, db_port, db_name):
        connection_url = 'postgresql+psycopg2://'+db_username+':'+db_password+'@'+db_host+':'+db_port+'/'+db_name
        return connection_url

conn_url = create_db_connection_string(db_username, db_password, db_host, db_port, db_name)

db_engine = create_engine(conn_url, pool_recycle=3600)

connection = db_engine.connect()

In [8]:
with db_engine.connect() as connection:
    leaderboard_df = pd.read_sql(text('SELECT * FROM "Top50SoloQ".leaderboard;'), connection)

leaderboard_df #List of top 300 players in LOL

Unnamed: 0,summonerId,leaguePoints,rank,wins,losses,veteran,inactive,freshBlood,hotStreak
0,I_Ly9cVMnbqP3Lk_fuZbij0g57jAl1vM223hn0BY-Thlu14,2154,I,173,100,True,False,False,True
1,f-f6cnj0xVapOm-KtoQZ14gpHKZBVBu5TnajCeSPNcNna2M8,1661,I,199,137,True,False,False,False
2,ywLe0SCqv9aq3iPO_EWY9l_lnNa4_QtyMW0_w1csrlv5RhA,1551,I,188,136,True,False,False,True
3,xH7x54xRGwAXcxcwEa6mkANUFb2nYQuQcAEZuB7jOtadV1U,1481,I,140,86,True,False,False,False
4,cXAxF2EIp-hCfhvHDpQ6MFAb5TFHEE7aS5eFV8ZMNC_Spe...,1475,I,235,189,True,False,False,False
...,...,...,...,...,...,...,...,...,...
6302,p9SSjKDI7ZVLkjuBPRKJZkrPAn7bs97FW2YPpAgQshwjBM...,0,I,25,21,False,False,False,False
6303,6LAr20zjLNQzmKNPJzKnlx0Ry-o5argtvoLUITEBeDtdsoA,0,I,39,35,False,False,False,False
6304,1yoQdWF52dhGvdn8eDdYx6Rg_SFN56wkpiPF8TmGSgsBdl...,0,I,27,22,False,False,False,False
6305,_T4EJRjPENc66ypQpv5TUoT7cer5OQi9OdGyWHVwL_ot0oU,0,I,39,25,False,False,False,False


In [6]:
with db_engine.connect() as connection:
    playerId_df = pd.read_sql(text('SELECT * FROM "playerId".summonerdata;'), connection)
playerId_df #list of top 90 players in LOL and their ids

Unnamed: 0,index,puuid,summonerId
0,0,Zl5uf2ERyvcZJD_4ELxzgc9SCaltAuDOjAIiuj2sdknOA1...,I_Ly9cVMnbqP3Lk_fuZbij0g57jAl1vM223hn0BY-Thlu14
1,1,lWMqB3Y5AuREUMTIq2uNUzYosZFcrsGm3UwKjuW8ZdQxQS...,f-f6cnj0xVapOm-KtoQZ14gpHKZBVBu5TnajCeSPNcNna2M8
2,2,5XqBXMuSPFUrpk807hIZ5jJ-IGj2uyOgySWq6-GrXlDcmM...,ywLe0SCqv9aq3iPO_EWY9l_lnNa4_QtyMW0_w1csrlv5RhA
3,3,iOI9wt8myzTiOUZpdqYumAQo1490Axk4sFvKp1ZTtz0fui...,xH7x54xRGwAXcxcwEa6mkANUFb2nYQuQcAEZuB7jOtadV1U
4,4,_7r_DQhl9ENq3mc9XYlpuESPeRyv3CS2lLLHrSN-WkrDvd...,cXAxF2EIp-hCfhvHDpQ6MFAb5TFHEE7aS5eFV8ZMNC_Spe...
...,...,...,...
6302,6302,DQxONVAoyDvcqNCbMR3j3ROB9qN_famSLFynNLIXVLr7Ht...,p9SSjKDI7ZVLkjuBPRKJZkrPAn7bs97FW2YPpAgQshwjBM...
6303,6303,XElf8T5P2XMu-sKuG6E0h2m7rhsZW1mkLZpKi1oSkcSAar...,6LAr20zjLNQzmKNPJzKnlx0Ry-o5argtvoLUITEBeDtdsoA
6304,6304,nCfo4k3xqrPJfG-uYKPe0J6xjwtlKwWRDvjy6LyjFBZim9...,1yoQdWF52dhGvdn8eDdYx6Rg_SFN56wkpiPF8TmGSgsBdl...
6305,6305,pnuAsi-arvwFFD7bGf6CuF3WuE3nD9IvlnzmCXb7JliRtG...,_T4EJRjPENc66ypQpv5TUoT7cer5OQi9OdGyWHVwL_ot0oU


<h2> Data Collection </h2>

<h4> API functions </h4>

In [27]:
api_key = "RGAPI-c8d92844-a41a-49d1-ab74-5891fd2bcb16"

def get_puuid_fromSummonerId(summonerId):
    puuid = requests.get(f"https://na1.api.riotgames.com/lol/summoner/v4/summoners/{summonerId}?api_key={api_key}").json()['puuid']
    return puuid

def getLeaderBoard(league):
    get_top50_url = f"https://na1.api.riotgames.com/lol/league/v4/{league}leagues/by-queue/RANKED_SOLO_5x5?api_key={api_key}"
    return requests.get(get_top50_url).json()

def get_matches(puuid):
    url = f"https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids?queue=420&start=0&count=25&api_key={api_key}"
    return requests.get(url).json()

def get_matchData(matchId):
    url = f"https://americas.api.riotgames.com/lol/match/v5/matches/{matchId}?api_key={api_key}"
    return requests.get(url).json()

<h4> Helper Functions </h4>

In [46]:
import time

def TopPlayers (): #function to get all players in the top 3 LOL leagues
    chal = getLeaderBoard('challenger')
    gm = getLeaderBoard('grandmaster')
    master = getLeaderBoard('master')

    chal_df = pd.DataFrame(chal['entries'])
    gm_df = pd.DataFrame(gm['entries'])
    master_df = pd.DataFrame(master['entries'])

    lb_df = pd.concat([chal_df, gm_df, master_df], ignore_index=True).sort_values('leaguePoints', ascending=False).reset_index(drop=True)
    lb_df = lb_df.set_index('summonerId')

    lb_df.to_sql('leaderboard', con=connection, schema='Top50SoloQ', if_exists='replace')

def PuuidWSummonerId(): #function to convert summonerId to Puuid and store it for easy conversions
    puuid_list = []
    count = 0
    for player in leaderboard_df['summonerId']:
        if count == 98:
            count = 0
            time.sleep(90) #wait for ratelimit on api
        
        puuid_list.append({'puuid' : get_puuid_fromSummonerId(player),
                        'summonerId': player})
        count += 1

    puuid_df = pd.DataFrame(puuid_list)
    puuid_df.to_sql(name='summonerdata', con=connection, schema='playerId', if_exists='replace', index=True)

def processMatchInfoGen(match):
    redTeamChamps = []
    redTeamRoles = []
    redTeamWRs = []
    redTeamPickRates = []
    redTeamBanRates = []
    redTeamTiers = []
    redTeamRanks = []

    blueTeamChamps = []
    blueTeamRoles = []
    blueTeamWRs = []
    blueTeamPickRates = []
    blueTeamBanRates = []
    blueTeamTiers = []
    blueTeamRanks = []

    outcome = 0
    participants = match['metadata']['participants']
    players = match['info']['participants']

    if match['info']['participants'][0]['win'] == False:
        if match['info']['participants'][0]['teamId'] == 200:
            outcome = 0
        else:
            outcome = 1
    else:
        if match['info']['participants'][0]['teamId'] == 200:
            outcome = 1
        else:
            outcome = 0
    #0 indicates blue side win, 1 indicates red side win

    def get_index(puuid):
        index = players[participants.index(puuid)]
        return index
    
    def get_championData(name):
        temp_df = champion_data_df[champion_data_df['name'].str.contains(name, case=False)]
        wr = temp_df['win_rate'].str.rstrip('%').astype(float).iloc[0]
        pr = temp_df['pick_rate'].str.rstrip('%').astype(float).iloc[0]
        br = temp_df['ban_rate'].str.rstrip('%').astype(float).iloc[0]
        tier = temp_df['tier'].iloc[0]
        rank = temp_df['rank'].str.rstrip('%').astype(float).iloc[0]

        return wr, pr, br, tier, rank
    
    for player in participants:
        player = get_index(player)
        if player['teamId'] == 200:
            if player['championId'] in championId_dict:
                wr, pr, br, tier, rank = get_championData(championId_dict[player['championId']])
                redTeamChamps.append(championId_dict[player['championId']])
            redTeamRoles.append(player['teamPosition'])
            redTeamWRs.append(wr)
            redTeamPickRates.append(pr)
            redTeamBanRates.append(br)
            redTeamTiers.append(tier)
            redTeamRanks.append(rank)
        else:
            if player['championId'] in championId_dict:
                wr, pr, br, tier, rank = get_championData(championId_dict[player['championId']])
                blueTeamChamps.append(championId_dict[player['championId']])
            blueTeamRoles.append(player['teamPosition'])
            blueTeamWRs.append(wr)
            blueTeamPickRates.append(pr)
            blueTeamBanRates.append(br)
            blueTeamTiers.append(tier)
            blueTeamRanks.append(rank)
    
    match_info = pd.DataFrame([{
        'outcome' : outcome,

        #red team
        'redTeam' : redTeamChamps,
        'redTeamRoles' : redTeamRoles,
        'redTeamWinRates' : redTeamWRs,
        'redTeamPickRatesm' : redTeamPickRates,
        'redTeamBanRates' : redTeamBanRates,
        'redTeamTiers' : redTeamTiers,
        'redTeamRanks' : redTeamRanks,
        #blue team
        'blueTeam' : blueTeamChamps,
        'blueTeamRoles' : blueTeamRoles,
        'blueTeamWinRates' : blueTeamWRs,
        'blueTeamPickRatesm' : blueTeamPickRates,
        'blueTeamBanRates' : blueTeamBanRates,
        'blueTeamTiers' : blueTeamTiers,
        'blueTeamRanks' : blueTeamRanks,
    }])
    return match_info


#test_df = processMatchInfo(get_matchData('NA1_5035035400'))
#test_df = test_df.replace(championId_dict)
#test_df

<h4> Collection of list of matches played by top players </h4>

In [3]:
import pandas as pd
dfv2 = pd.read_csv('dfv2.csv')
dfv2 = dfv2.drop(columns='Unnamed: 0')
dfv2

Unnamed: 0,outcome,redTeam,redTeamRoles,redTeamWinRates,redTeamPickRatesm,redTeamBanRates,redTeamTiers,redTeamRanks,blueTeam,blueTeamRoles,blueTeamWinRates,blueTeamPickRatesm,blueTeamBanRates,blueTeamTiers,blueTeamRanks
0,1,"['Kennen', 'Rengar', 'Zed', ""Kai'Sa"", 'Rakan']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.69, 48.61, 48.8, 50.8, 50.11]","[2.4, 2.7, 7.0, 33.7, 5.0]","[1.6, 5.4, 18.7, 12.4, 0.8]","['ATier', 'DTier', 'DTier', 'S+Tier', 'ATier']","[38.98, 88.24, 87.93, 29.63, 45.45]","['Udyr', 'Nidalee', 'Galio', 'Ezreal', 'Seraph...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.88, 50.17, 52.42, 49.56, 50.63]","[3.4, 9.2, 3.8, 25.2, 4.7]","[2.3, 12.7, 1.5, 15.1, 1.1]","['STier', 'S+Tier', 'S+Tier', 'DTier', 'ATier']","[5.88, 50.98, 8.62, 74.07, 34.09]"
1,1,"['Camille', 'Fiddlesticks', 'Corki', ""Kai'Sa"",...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.58, 52.0, 47.91, 50.8, 49.37]","[6.3, 3.2, 5.9, 33.7, 13.1]","[7.2, 2.7, 4.2, 12.4, 20.7]","['S+Tier', 'STier', 'DTier', 'S+Tier', 'DTier']","[8.47, 1.96, 91.38, 29.63, 72.73]","['Warwick', 'Karthus', 'Garen', 'Ezreal', 'Jan...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.24, 49.06, 50.45, 49.56, 51.7]","[2.7, 3.4, 6.8, 25.2, 5.3]","[1.2, 13.1, 4.7, 15.1, 3.1]","['ATier', 'DTier', 'STier', 'DTier', 'S+Tier']","[13.73, 76.47, 45.76, 74.07, 9.09]"
2,1,"['Twisted Fate', 'Samira', 'Ezreal', 'Karthus'...","['TOP', 'BOTTOM', 'MIDDLE', 'JUNGLE', 'UTILITY']","[49.93, 48.69, 49.56, 49.06, 49.37]","[3.4, 4.9, 25.2, 3.4, 13.1]","[3.7, 10.7, 15.1, 13.1, 20.7]","['BTier', 'DTier', 'DTier', 'DTier', 'DTier']","[58.62, 77.78, 74.07, 76.47, 72.73]","['Zeri', ""Kha'Zix"", 'Fiddlesticks', ""Kai'Sa"", ...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.72, 50.38, 52.0, 50.8, 50.75]","[12.0, 10.1, 3.2, 33.7, 6.3]","[4.6, 9.9, 2.7, 12.4, 2.3]","['CTier', 'S+Tier', 'STier', 'S+Tier', 'STier']","[59.26, 47.06, 1.96, 29.63, 27.27]"
3,0,"['Rumble', 'Viego', 'Hwei', 'Samira', 'Nautilus']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.64, 49.82, 48.81, 48.69, 49.37]","[3.7, 11.5, 7.7, 4.9, 13.1]","[7.2, 6.7, 17.1, 10.7, 20.7]","['BTier', 'BTier', 'DTier', 'DTier', 'DTier']","[72.88, 66.67, 86.21, 77.78, 72.73]","['Udyr', 'Brand', 'Corki', 'Jinx', 'Braum']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.88, 47.99, 47.91, 51.36, 51.47]","[3.4, 2.7, 5.9, 14.9, 6.5]","[2.3, 15.2, 4.2, 5.9, 3.8]","['STier', 'DTier', 'DTier', 'S+Tier', 'S+Tier']","[5.88, 88.64, 91.38, 22.22, 13.64]"
4,1,"['Camille', 'Nunu & Willump', 'Twisted Fate', ...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.58, 50.62, 49.93, 49.56, 50.25]","[6.3, 2.4, 3.4, 25.2, 2.3]","[7.2, 0.4, 3.7, 15.1, 0.7]","['S+Tier', 'ATier', 'BTier', 'DTier', 'ATier']","[8.47, 37.25, 58.62, 74.07, 40.91]","['Lucian', 'Ekko', 'Yone', 'Miss Fortune', 'Mi...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.56, 50.48, 49.42, 50.5, 50.03]","[9.8, 4.1, 6.4, 8.9, 7.6]","[3.2, 1.4, 6.7, 2.6, 5.0]","['CTier', 'ATier', 'DTier', 'STier', 'STier']","[70.37, 43.14, 74.14, 33.33, 50.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7594,1,"['Jax', 'Zac', 'Akali', ""Kai'Sa"", 'Sona']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.1, 49.88, 49.0, 50.8, 51.2]","[6.4, 3.9, 4.8, 33.7, 2.8]","[13.1, 3.9, 12.0, 12.4, 0.2]","['S+Tier', 'BTier', 'DTier', 'S+Tier', 'ATier']","[61.02, 62.75, 82.76, 29.63, 20.45]","[""K'Sante"", 'Diana', 'Tristana', 'Jinx', 'Blit...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[46.62, 49.92, 50.73, 51.36, 49.8]","[4.3, 5.3, 11.2, 14.9, 6.6]","[2.0, 2.6, 19.0, 5.9, 25.2]","['DTier', 'BTier', 'S+Tier', 'S+Tier', 'ATier']","[94.92, 58.82, 37.93, 22.22, 61.36]"
7595,1,"['Skarner', 'Zac', 'Sylas', 'Draven', 'Thresh']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[45.82, 49.88, 50.83, 50.09, 50.04]","[1.8, 3.9, 7.4, 5.7, 12.6]","[18.4, 3.9, 9.3, 25.6, 4.8]","['DTier', 'BTier', 'S+Tier', 'S+Tier', 'STier']","[98.31, 62.75, 34.48, 40.74, 47.73]","['Sett', 'Shyvana', 'Xerath', 'Jhin', 'Galio']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.09, 51.05, 49.19, 50.05, 52.42]","[6.9, 3.3, 3.9, 15.2, 3.8]","[2.9, 0.9, 5.8, 3.4, 1.5]","['S+Tier', 'ATier', 'CTier', 'STier', 'S+Tier']","[16.95, 17.65, 77.27, 44.44, 8.62]"
7596,1,"['Ornn', 'Fiddlesticks', 'Tristana', 'Zeri', '...","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.16, 52.0, 50.73, 49.72, 50.04]","[3.8, 3.2, 11.2, 12.0, 12.6]","[0.5, 2.7, 19.0, 4.6, 4.8]","['ATier', 'STier', 'S+Tier', 'CTier', 'STier']","[59.32, 1.96, 37.93, 59.26, 47.73]","['Yone', 'Ivern', 'Xerath', ""Kai'Sa"", 'Maokai']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.42, 51.91, 49.19, 50.8, 50.67]","[6.4, 1.8, 3.9, 33.7, 2.0]","[6.7, 1.0, 5.8, 12.4, 0.4]","['DTier', 'ATier', 'CTier', 'S+Tier', 'ATier']","[74.14, 3.92, 77.27, 29.63, 29.55]"
7597,1,"['Kennen', 'Brand', 'Yasuo', 'Jinx', 'Karma']","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.69, 47.99, 49.68, 51.36, 49.99]","[2.4, 2.7, 8.4, 14.9, 7.9]","[1.6, 15.2, 17.8, 5.9, 3.0]","['ATier', 'DTier', 'BTier', 'S+Tier', 'BTier']","[38.98, 88.64, 65.52, 22.22, 52.27]","['Azir', 'Viego', 'Akali', 'Aphelios', 'Sona']","['MIDDLE', 'JUNGLE', 'TOP', 'BOTTOM', 'UTILITY']","[46.39, 49.82, 49.0, 46.65, 51.2]","[2.5, 11.5, 4.8, 3.9, 2.8]","[0.4, 6.7, 12.0, 0.6, 0.2]","['DTier', 'BTier', 'DTier', 'DTier', 'ATier']","[100.0, 66.67, 82.76, 96.3, 20.45]"


<h2> AI Model Training/Testing </h2>

In [4]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
import numpy as np

tiers = ['S+Tier', 'STier', 'ATier', 'BTier', 'CTier', 'DTier']

tier_label_encoder = LabelEncoder()

all_champions = []
for champ in championId_dict.values():
    all_champions.append(champ)

champ_label_encoder = LabelEncoder()

champ_label_encoder.fit(all_champions)
tier_label_encoder.fit(tiers)

tester_df = dfv2.copy()

In [5]:
tester_df.isnull().sum()
nan_rows = tester_df.isnull().any()
nan_rows

outcome               False
redTeam               False
redTeamRoles          False
redTeamWinRates       False
redTeamPickRatesm     False
redTeamBanRates       False
redTeamTiers          False
redTeamRanks          False
blueTeam              False
blueTeamRoles         False
blueTeamWinRates      False
blueTeamPickRatesm    False
blueTeamBanRates      False
blueTeamTiers         False
blueTeamRanks         False
dtype: bool

In [6]:
import ast
tester_df['redTeam'] = tester_df['redTeam'].apply(ast.literal_eval)
tester_df['redTeam'] = tester_df['redTeam'].apply(lambda team: [champ.strip("'") for champ in team])
tester_df['redTeamTiers'] = tester_df['redTeamTiers'].apply(ast.literal_eval)
tester_df['redTeamTiers'] = tester_df['redTeamTiers'].apply(lambda team: [champ.strip("'") for champ in team])

tester_df['blueTeam'] = tester_df['blueTeam'].apply(ast.literal_eval)
tester_df['blueTeam'] = tester_df['blueTeam'].apply(lambda team: [champ.strip("'") for champ in team])
tester_df['blueTeamTiers'] = tester_df['blueTeamTiers'].apply(ast.literal_eval)
tester_df['blueTeamTiers'] = tester_df['blueTeamTiers'].apply(lambda team: [champ.strip("'") for champ in team])

In [7]:
tester_df['redTeamWinRates'] = tester_df['redTeamWinRates'].apply(ast.literal_eval)
tester_df['blueTeamWinRates'] = tester_df['blueTeamWinRates'].apply(ast.literal_eval)
tester_df['redTeamPickRatesm'] = tester_df['redTeamPickRatesm'].apply(ast.literal_eval)
tester_df['blueTeamPickRatesm'] = tester_df['blueTeamPickRatesm'].apply(ast.literal_eval)
tester_df['redTeamBanRates'] = tester_df['redTeamBanRates'].apply(ast.literal_eval)
tester_df['blueTeamBanRates'] = tester_df['blueTeamBanRates'].apply(ast.literal_eval)
tester_df['redTeamRanks'] = tester_df['redTeamRanks'].apply(ast.literal_eval)
tester_df['blueTeamRanks'] = tester_df['blueTeamRanks'].apply(ast.literal_eval)


tester_df['redTeamWinRates'] = tester_df['redTeamWinRates'].apply(lambda x: [float(i) for i in x])
tester_df['blueTeamWinRates'] = tester_df['blueTeamWinRates'].apply(lambda x: [float(i) for i in x])
tester_df['redTeamPickRatesm'] = tester_df['redTeamPickRatesm'].apply(lambda x: [float(i) for i in x])
tester_df['blueTeamPickRatesm'] = tester_df['blueTeamPickRatesm'].apply(lambda x: [float(i) for i in x])
tester_df['redTeamBanRates'] = tester_df['redTeamBanRates'].apply(lambda x: [float(i) for i in x])
tester_df['blueTeamBanRates'] = tester_df['blueTeamBanRates'].apply(lambda x: [float(i) for i in x])
tester_df['redTeamRanks'] = tester_df['redTeamRanks'].apply(lambda x: [float(i) for i in x])
tester_df['blueTeamRanks'] = tester_df['blueTeamRanks'].apply(lambda x: [float(i) for i in x])

In [8]:
tester_df['redTeam'] = tester_df['redTeam'].apply(lambda x: champ_label_encoder.transform(x))
tester_df['blueTeam'] = tester_df['blueTeam'].apply(lambda x: champ_label_encoder.transform(x))
tester_df['redTeamTiers'] = tester_df['redTeamTiers'].apply(lambda x: tier_label_encoder.transform(x))
tester_df['blueTeamTiers'] = tester_df['blueTeamTiers'].apply(lambda x: tier_label_encoder.transform(x))
tester_df

Unnamed: 0,outcome,redTeam,redTeamRoles,redTeamWinRates,redTeamPickRatesm,redTeamBanRates,redTeamTiers,redTeamRanks,blueTeam,blueTeamRoles,blueTeamWinRates,blueTeamPickRatesm,blueTeamBanRates,blueTeamTiers,blueTeamRanks
0,1,"[63, 107, 162, 55, 101]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.69, 48.61, 48.8, 50.8, 50.11]","[2.4, 2.7, 7.0, 33.7, 5.0]","[1.6, 5.4, 18.7, 12.4, 0.8]","[0, 3, 3, 4, 0]","[38.98, 88.24, 87.93, 29.63, 45.45]","[140, 89, 35, 31, 114]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.88, 50.17, 52.42, 49.56, 50.63]","[3.4, 9.2, 3.8, 25.2, 4.7]","[2.3, 12.7, 1.5, 15.1, 1.1]","[5, 4, 4, 3, 0]","[5.88, 50.98, 8.62, 74.07, 34.09]"
1,1,"[20, 32, 23, 55, 87]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.58, 52.0, 47.91, 50.8, 49.37]","[6.3, 3.2, 5.9, 33.7, 13.1]","[7.2, 2.7, 4.2, 12.4, 20.7]","[4, 5, 3, 4, 3]","[8.47, 1.96, 91.38, 29.63, 72.73]","[152, 58, 37, 31, 48]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.24, 49.06, 50.45, 49.56, 51.7]","[2.7, 3.4, 6.8, 25.2, 5.3]","[1.2, 13.1, 4.7, 15.1, 3.1]","[0, 3, 5, 3, 4]","[13.73, 76.47, 45.76, 74.07, 9.09]"
2,1,"[138, 111, 31, 58, 87]","['TOP', 'BOTTOM', 'MIDDLE', 'JUNGLE', 'UTILITY']","[49.93, 48.69, 49.56, 49.06, 49.37]","[3.4, 4.9, 25.2, 3.4, 13.1]","[3.7, 10.7, 15.1, 13.1, 20.7]","[1, 3, 3, 3, 3]","[58.62, 77.78, 74.07, 76.47, 72.73]","[163, 64, 32, 55, 4]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.72, 50.38, 52.0, 50.8, 50.75]","[12.0, 10.1, 3.2, 33.7, 6.3]","[4.6, 9.9, 2.7, 12.4, 2.3]","[2, 4, 5, 4, 5]","[59.26, 47.06, 1.96, 29.63, 27.27]"
3,0,"[109, 148, 44, 111, 87]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.64, 49.82, 48.81, 48.69, 49.37]","[3.7, 11.5, 7.7, 4.9, 13.1]","[7.2, 6.7, 17.1, 10.7, 20.7]","[1, 1, 3, 3, 3]","[72.88, 66.67, 86.21, 77.78, 72.73]","[140, 16, 23, 53, 17]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.88, 47.99, 47.91, 51.36, 51.47]","[3.4, 2.7, 5.9, 14.9, 6.5]","[2.3, 15.2, 4.2, 5.9, 3.8]","[5, 3, 3, 4, 4]","[5.88, 88.64, 91.38, 22.22, 13.64]"
4,1,"[20, 92, 138, 31, 165]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.58, 50.62, 49.93, 49.56, 50.25]","[6.3, 2.4, 3.4, 25.2, 2.3]","[7.2, 0.4, 3.7, 15.1, 0.7]","[4, 0, 1, 3, 0]","[8.47, 37.25, 58.62, 74.07, 40.91]","[73, 28, 158, 81, 80]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.56, 50.48, 49.42, 50.5, 50.03]","[9.8, 4.1, 6.4, 8.9, 7.6]","[3.2, 1.4, 6.7, 2.6, 5.0]","[2, 0, 3, 5, 5]","[70.37, 43.14, 74.14, 33.33, 50.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7594,1,"[50, 161, 2, 55, 124]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.1, 49.88, 49.0, 50.8, 51.2]","[6.4, 3.9, 4.8, 33.7, 2.8]","[13.1, 3.9, 12.0, 12.4, 0.2]","[4, 1, 3, 4, 0]","[61.02, 62.75, 82.76, 29.63, 20.45]","[54, 25, 135, 53, 15]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[46.62, 49.92, 50.73, 51.36, 49.8]","[4.3, 5.3, 11.2, 14.9, 6.6]","[2.0, 2.6, 19.0, 5.9, 25.2]","[3, 1, 4, 4, 0]","[94.92, 58.82, 37.93, 22.22, 61.36]"
7595,1,"[122, 161, 127, 27, 134]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[45.82, 49.88, 50.83, 50.09, 50.04]","[1.8, 3.9, 7.4, 5.7, 12.6]","[18.4, 3.9, 9.3, 25.6, 4.8]","[3, 1, 4, 4, 5]","[98.31, 62.75, 34.48, 40.74, 47.73]","[115, 118, 155, 52, 35]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[51.09, 51.05, 49.19, 50.05, 52.42]","[6.9, 3.3, 3.9, 15.2, 3.8]","[2.9, 0.9, 5.8, 3.4, 1.5]","[4, 0, 2, 5, 4]","[16.95, 17.65, 77.27, 44.44, 8.62]"
7596,1,"[95, 32, 135, 163, 134]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.16, 52.0, 50.73, 49.72, 50.04]","[3.8, 3.2, 11.2, 12.0, 12.6]","[0.5, 2.7, 19.0, 4.6, 4.8]","[0, 5, 4, 2, 5]","[59.32, 1.96, 37.93, 59.26, 47.73]","[158, 47, 155, 55, 78]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[49.42, 51.91, 49.19, 50.8, 50.67]","[6.4, 1.8, 3.9, 33.7, 2.0]","[6.7, 1.0, 5.8, 12.4, 0.4]","[3, 0, 2, 4, 0]","[74.14, 3.92, 77.27, 29.63, 29.55]"
7597,1,"[63, 16, 157, 53, 57]","['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']","[50.69, 47.99, 49.68, 51.36, 49.99]","[2.4, 2.7, 8.4, 14.9, 7.9]","[1.6, 15.2, 17.8, 5.9, 3.0]","[0, 3, 1, 4, 1]","[38.98, 88.64, 65.52, 22.22, 52.27]","[12, 148, 2, 8, 124]","['MIDDLE', 'JUNGLE', 'TOP', 'BOTTOM', 'UTILITY']","[46.39, 49.82, 49.0, 46.65, 51.2]","[2.5, 11.5, 4.8, 3.9, 2.8]","[0.4, 6.7, 12.0, 0.6, 0.2]","[3, 1, 3, 3, 0]","[100.0, 66.67, 82.76, 96.3, 20.45]"


In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

numerical_features = ['redTeamWinRates', 'blueTeamWinRates', 'redTeamPickRatesm', 'blueTeamPickRatesm', 'redTeamBanRates', 'blueTeamBanRates', 'redTeamRanks', 'blueTeamRanks']

for feature in numerical_features:
    tester_df[feature] = tester_df[feature].apply(lambda x: scaler.fit_transform(np.array(x).reshape(-1, 1)).flatten())


In [10]:
X = np.hstack([
    np.stack(tester_df['redTeam'].values),
    np.stack(tester_df['blueTeam'].values),
    np.stack(tester_df['redTeamWinRates'].values),
    np.stack(tester_df['blueTeamWinRates'].values),
    np.stack(tester_df['redTeamPickRatesm'].values),
    np.stack(tester_df['blueTeamPickRatesm'].values),
    np.stack(tester_df['redTeamBanRates'].values),
    np.stack(tester_df['blueTeamBanRates'].values),
    np.stack(tester_df['redTeamTiers'].values),
    np.stack(tester_df['blueTeamTiers'].values),
    np.stack(tester_df['redTeamRanks'].values),
    np.stack(tester_df['blueTeamRanks'].values)
])

In [11]:
y = tester_df['outcome'].values

In [12]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

<h4>RandomForest</h4>

In [13]:
# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
#rf_model = RandomForestClassifier()

In [14]:
# Train the model
rf_model.fit(X_train, y_train)

In [15]:
# Make predictions
y_pred = rf_model.predict(X_test)

In [36]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.525


In [None]:
import pickle

with open('RandomForestTeamMatchupModel.pkl', 'wb') as file:
    pickle.dump(rf_model, file)

In [50]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

xgb_model = xgb.XGBClassifier(eval_metric='logloss')
xgb_model.fit(X_train, y_train)

# Make predictions
y_pred = xgb_model.predict(X_test)
y_pred_prob = xgb_model.predict_proba(X_test)[:, 1] 
y_pred_percentage = y_pred_prob * 100
y_pred_binary = np.round(y_pred_prob)


# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
idk_df = pd.DataFrame(y_pred_percentage)
idk_df


Accuracy: 0.5111842105263158


Unnamed: 0,0
0,54.933784
1,74.486473
2,76.822105
3,54.033203
4,76.192078
...,...
1515,71.546394
1516,59.824753
1517,64.380135
1518,53.612255


In [286]:
y_pred_df = pd.DataFrame(y_pred)
y_pred_df

Unnamed: 0,0
0,0
1,0
2,1
3,0
4,1
...,...
1515,0
1516,0
1517,0
1518,1
