# League of Legends Project

## Part 1: Data Collection
    Ranked match data will be collected from each Summoner Name provided. 

    Part 1A: Data Collection from Data Dragon
        - create mapping dictionaries from the json files provided

    Part 1B: Data Collection from Riot API
        - extract data from RIOT API
    
## Part 2: Data Analysis
    Exploratory Data Analysis will be done on the match data to discover basic statistics and trends.
    
    Part 2A: Data Wrangling and Cleaning
        - wrangle and manipulate data table for consistency
        - prepare data for visualization and machine learning

    Part 2B: Data Visualization
        - visualize trends and correlations show in the data
        - create dashboard to visualize game statistics by player
        
    Part 2C: Machine Learning
        - using data extracted from top 100 players, predict the outcome of a match based off the different variables provided
        - using data extracted from top 100 players, identify and group champions based off classification algorithm

# Part 1A

In [26]:
import pandas as pd
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import regex as re
import json
import pprint
import time
from bs4 import BeautifulSoup

## Champion JSON

In [27]:
def create_champion_mapping_dict(file_name):
    champion_key_dict = {}
    with open(f'json_files/{file_name}', encoding = 'utf-8') as f:
        champion_json = json.load(f)
        for champion, info in champion_json['data'].items():
            champion_name = champion
            for c_key, c_value in info.items():
                if c_key == 'key':
                    champion_key_dict[champion_name] = int(c_value)
    return champion_key_dict

champion_key_dict = create_champion_mapping_dict('champion.json')

## Item JSON

In [28]:
def create_item_mapping_dict(file_name):
    item_key_dict = {}
    with open(f'json_files/{file_name}', encoding = 'utf-8') as f:
        item_data = json.load(f)
        for item, info in item_data['data'].items():
            item_id = item
            for i_key, i_value in info.items():
                if i_key == 'name':
                    item_key_dict[int(item_id)] = i_value
    return item_key_dict

item_key_dict = create_item_mapping_dict('item.json')

## Queue JSON

In [29]:
def create_queue_mapping_dict(file_name):
    queue_info_dict = {}
    with open(f'json_files/{file_name}', encoding = 'utf-8') as f:
        queue_data = json.load(f)
        for queue in queue_data:
            queue_key = queue['queueId']
            queue_name = queue['description']
            queue_notes = str(queue['notes'])
            if 'deprecated' not in queue_notes.lower():
                queue_info_dict[queue_key] = str(queue_name).upper()

    queue_info_dict_reverse = {str(v).upper(): k for (k,v) in queue_info_dict.items()}
    return queue_info_dict, queue_info_dict_reverse

queue_key_dict, queue_key_dict_reverse = create_queue_mapping_dict('queue.json')

## Summoner Spell JSON

In [30]:
def create_summoner_spell_mapping_dict(file_name):
    ss_info_dict = {}
    with open(f'json_files/{file_name}', encoding = 'utf-8') as f:
        ss_data = json.load(f)
        for s_spell, info in ss_data['data'].items():
            for key in info:
                ss_key = info['key']
                ss_name = info['name']
                ss_description = info['description']
                ss_info_dict[ss_name] = (ss_key, ss_description)
    return ss_info_dict

ss_key_dict = create_summoner_spell_mapping_dict('summoner.json')

## Season JSON

In [31]:
def create_season_mapping_dict(file_name):
    season_info_dict = {}
    with open(f"json_files/{file_name}", encoding = "utf-8") as f:
        season_data = json.load(f)
        for season in season_data:
            season_info_dict[season['id']] = season['season']
    return season_info_dict
    
season_key_dict = create_season_mapping_dict('season.json')

# Part 1B

In [32]:
REQUEST_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Charset': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Origin': 'https://developer.riotgames.com',
    'X-Riot-Token': ''
    }
RIOT_API = ''

## Getting Account Information Using Summoner Name

In [33]:
def get_puu_id(summoner_name, riot_api_key = RIOT_API, request_headers = REQUEST_HEADERS, access = True):
    summoner_name_updated = summoner_name.replace(' ', '%20')
    url = 'https://na1.api.riotgames.com/lol/summoner/v4/summoners/by-name/'
    account_url = url + summoner_name_updated
    riot_headers = request_headers
    riot_headers["X-Riot-Token"] = riot_api_key
    while access:
        req = requests.get(account_url, headers = riot_headers)
        if req.status_code == 200:
            account_info = json.loads(req.content.decode("utf-8"))
            puu_id = account_info['puuid']
            access = False

        elif req.status_code == 404:
            puu_id = None
            access = False

        else:
            time.sleep(10)
    return  puu_id

## Getting A List of Match IDs Using PUU ID

In [34]:
def get_match_ids(puu_id, start, end, riot_api_key = RIOT_API, request_headers = REQUEST_HEADERS, access = True):
    match_ids = []
    match_history_url = f'https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/{puu_id}/ids?start={start}&count={end}'
    riot_headers = request_headers
    riot_headers["X-Riot-Token"] = riot_api_key

    while access:
        req = requests.get(match_history_url, headers = riot_headers)
        if req.status_code == 200:
            match_id_info = json.loads(req.content.decode("utf-8"))
            for match_id in match_id_info:
                match_ids.append(match_id)
            access = False
        else:
            print('Waiting for 10 seconds to refresh restriction request...')
            time.sleep(10)

    return match_ids

## Get Match Information Using Match ID

In [35]:
def get_match_details(match_id, riot_api_key = RIOT_API, request_headers = REQUEST_HEADERS, access = True):
    match_url = 'https://americas.api.riotgames.com/lol/match/v5/matches/' + str(match_id)
    riot_headers = request_headers
    riot_headers["X-Riot-Token"] = riot_api_key

    while access:
        req = requests.get(match_url, headers = riot_headers)
        if req.status_code == 200:
            match_details = json.loads(req.content.decode("utf-8"))
            access = False

        elif req.status_code == 404:
            print(f'{match_id} - DATA NOT FOUND - MATCH FILE NOT FOUND')
            match_details = None
            access = False
            
        else:
            print(req.status_code)
            print('Waiting for 10 seconds to refresh restriction request...')
            time.sleep(10)

    return match_details

## Cleaning and Reducing Match Data

In [36]:
def clean_and_reduce_match_data(match_details, queue_type, queue_mapping_info = queue_key_dict_reverse):
    queue_filter = queue_mapping_info[queue_type.upper()]
    match_cleaned_dict = {}
    for key, value in match_details['info'].items():

        if key == 'gameId':
            match_id = value

        if key == 'gameMode':
            match_type = value
            
        if key == 'participants':
            participant_details = value
        
        if key == 'queueId':
            if value == queue_filter:
                queue_id = value
                match_cleaned_dict[(match_type, queue_id, match_id)] = participant_details
            else:
                return None
    return match_cleaned_dict

In [37]:
def filter_match_data_by_puuid(match_cleaned_dict, puu_id):
    for match_id, match_details in match_cleaned_dict.items():
        for participant_dict in match_details:
            if participant_dict['puuid'] == puu_id:
                return {match_id[0] + '_' + str(match_id[1]) + '_' + str(match_id[2]): participant_dict}

In [38]:
def remove_perks_attribute(match):
    for key, info in match.items():
        for key in list(info.keys()):
            if key == 'perks':
                del info[key]

    return match

## Turn Match Data into DataFrame

In [39]:
def make_match_df(match):
    match_df = pd.DataFrame.from_dict(match, orient = 'index')
    return match_df

## Create Super Function for Data Collection

In [40]:
def get_match_history_by_summoner_name(summoner_name, start, end, queue_type, riot_api = RIOT_API, request_headers = REQUEST_HEADERS):
    puu_id = get_puu_id(summoner_name, riot_api, request_headers)

    if puu_id != None:
        match_ids = get_match_ids(puu_id, start, end, riot_api, request_headers)

        matches_df_listed = []

        for match_id in match_ids:
            match_detail = get_match_details(match_id, riot_api, request_headers)
            if match_detail != None:
                match_cleaned = clean_and_reduce_match_data(match_detail, queue_type)
                if match_cleaned != None:
                    match_cleaned_filtered = filter_match_data_by_puuid(match_cleaned, puu_id)

                    match_final = remove_perks_attribute(match_cleaned_filtered)

                    match_df = make_match_df(match_final)
    
                    matches_df_listed.append(match_df)
                    
                else:
                    print(f'Match ID: {match_id} IS NOT OF THE QUEUE TYPE: {queue_type.upper()}')
                    continue

        if matches_df_listed != []:
            return pd.concat(matches_df_listed)
                
        
    else:
        return f'NO PUU_ID FOUND FOR SUMMONER: {summoner_name}'

## Create Super Function for Initial Data Cleanup


In [41]:
def clean_columns(data):
    data = data.reset_index()

    cleaned_cols = list(data.columns.values)[1:]

    data[['matchType', 'queueId', 'matchId']] = data['index'].str.split('_', expand = True)

    cleaned_cols = ['queueId', 'matchId'] + cleaned_cols
    data = data[cleaned_cols]
    
    data['queueId'] = data['queueId'].astype(int)

    return data

def mapping_data_values(data, mapping_key):
    if mapping_key == 'Q':
        data['queueId'] = data['queueId'].map(queue_key_dict)

    if mapping_key == 'I':
        item_columns = ["item0", "item1", "item2", "item3", "item4", "item5", "item6"]
        for col in item_columns:
            data[col] = data[col].map(item_key_dict)

    if mapping_key == 'S':
        summoner_columns = ['summoner1Id', 'summoner2Id']
        for col in summoner_columns:
            data[col] = data[col].map(ss_key_dict)
    
    if mapping_key == 'T':
        team_color = data['teamId'].map({100: 'Blue', 200: 'Red'})
        data.insert(1, 'teamColor', team_color)
    
    return data

In [42]:
def initial_cleanup(data):
    data = clean_columns(data)

    data = mapping_data_values(data, 'Q')

    data = mapping_data_values(data, 'I')

    data = mapping_data_values(data, 'S')

    data = mapping_data_values(data, 'T')

    return data

## Combine All Super Functions

In [43]:
def get_clean_match_data_by_summoner(summoner_name, start, end, queue_type):

    df = get_match_history_by_summoner_name(summoner_name, start, end, queue_type)
    if isinstance(df, pd.DataFrame):
        df = initial_cleanup(df)
    else:
        df = f'NO MATCHES OF THE TYPE {queue_type.upper()} IN THE LAST {end} GAMES'   

    return df

In [44]:
def get_summoner_match_clean_df(summoner_name, num_games, q_type):

    solo_game_count = 0
    starting = 0
    df_listed = []
    ending = 1

    print(f'STARTED PROCESSING FOR SUMMONER: {summoner_name}')
    print(f'ROW {starting} HAS BEEN PROCESSED')

    while solo_game_count < num_games:
        df = get_clean_match_data_by_summoner(summoner_name = summoner_name, start = starting, end = ending, queue_type = q_type)
        starting += ending
        print(f'ROW {starting} HAS BEEN PROCESSED')

        if isinstance(df, pd.DataFrame):
            solo_game_count += df.shape[0]
            print(f'SOLO GAME ROW COUNT: {solo_game_count}')
            df_listed.append(df)

    final = pd.concat(df_listed)
    return final

## Getting Most Recent 50 Solo Ranked Matches From Top 100 Challenger Level Players

In [47]:
def get_top_100_challenger_players(chall_url):
    response = requests.get(chall_url)
    html_soup  = BeautifulSoup(response.text, 'html.parser')
    challlenger_top_100_snames = []

    chall_top_4 = html_soup.find_all('li', class_ = 'ranking-highest__item')
    for challenger in chall_top_4:
        chall_sname = challenger.find('a', class_ = 'ranking-highest__name').text
        challlenger_top_100_snames.append(chall_sname)
    
    chall_rest = html_soup.find_all('tr', class_ = 'ranking-table__row')
    for challenger in chall_rest:
        challlenger_top_100_snames.append(challenger.span.text)

    return challlenger_top_100_snames

In [48]:
challenger_top100_snames = get_top_100_challenger_players('https://na.op.gg/ranking/ladder/page=1')

In [49]:
match_df_listed = []
for challenger in challenger_top100_snames:
    match_df = get_summoner_match_clean_df(summoner_name = challenger, num_games = 50, q_type = '5v5 Ranked Solo Games')
    match_df_listed.append(match_df)

STARTED PROCESSING FOR SUMMONER: From Iron
ROW 0 HAS BEEN PROCESSED
ROW 1 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 1
ROW 2 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 2
ROW 3 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 3
ROW 4 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 4
ROW 5 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 5
ROW 6 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 6
ROW 7 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 7
ROW 8 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 8
ROW 9 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 9
ROW 10 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 10
ROW 11 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 11
ROW 12 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 12
ROW 13 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 13
ROW 14 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 14
ROW 15 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 15
ROW 16 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 16
ROW 17 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 17
ROW 18 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 18
ROW 19 HAS BEEN PROCESSED
SOLO GAME ROW COUNT: 19


In [50]:
final_df = pd.concat(match_df_listed)
final_df.head()

Unnamed: 0,queueId,teamColor,matchId,assists,baronKills,bountyLevel,champExperience,champLevel,championId,championName,championTransform,consumablesPurchased,damageDealtToBuildings,damageDealtToObjectives,damageDealtToTurrets,damageSelfMitigated,deaths,detectorWardsPlaced,doubleKills,dragonKills,firstBloodAssist,firstBloodKill,firstTowerAssist,firstTowerKill,gameEndedInEarlySurrender,gameEndedInSurrender,goldEarned,goldSpent,individualPosition,inhibitorKills,inhibitorsLost,item0,item1,item2,item3,item4,item5,item6,itemsPurchased,killingSprees,kills,lane,largestCriticalStrike,largestKillingSpree,largestMultiKill,longestTimeSpentLiving,magicDamageDealt,magicDamageDealtToChampions,magicDamageTaken,neutralMinionsKilled,nexusKills,nexusLost,objectivesStolen,objectivesStolenAssists,participantId,pentaKills,physicalDamageDealt,physicalDamageDealtToChampions,physicalDamageTaken,profileIcon,puuid,quadraKills,riotIdName,riotIdTagline,role,sightWardsBoughtInGame,spell1Casts,spell2Casts,spell3Casts,spell4Casts,summoner1Casts,summoner1Id,summoner2Casts,summoner2Id,summonerId,summonerLevel,summonerName,teamEarlySurrendered,teamId,teamPosition,timeCCingOthers,timePlayed,totalDamageDealt,totalDamageDealtToChampions,totalDamageShieldedOnTeammates,totalDamageTaken,totalHeal,totalHealsOnTeammates,totalMinionsKilled,totalTimeCCDealt,totalTimeSpentDead,totalUnitsHealed,tripleKills,trueDamageDealt,trueDamageDealtToChampions,trueDamageTaken,turretKills,turretsLost,unrealKills,visionScore,visionWardsBoughtInGame,wardsKilled,wardsPlaced,win
0,5V5 RANKED SOLO GAMES,Blue,3944288481,13,0,0,8243,11,223,TahmKench,0,7,488,1419,488,17878,6,3,0,0,False,False,False,False,False,True,7039,6425,BOTTOM,0,0,Doran's Shield,Sunfire Aegis,Plated Steelcaps,Kindlegem,Ruby Crystal,,Stealth Ward,18,0,1,BOTTOM,0,0,1,312,20774,4743,6487,0,0,0,0,0,5,0,10610,731,9696,4568,6zpHc103eNNhAt8xC6Ujhak476wF7TuTm4K4vrCU2Zuu9Y...,0,,,CARRY,0,71,4,11,2,3,,4,,-wR2sAu8IN0jiLgjgGyDQsjpnyX5tSwTwZHcIE7L9aS1TbyQ,274,From Iron,False,100,BOTTOM,14,1284,31384,5475,1105,16532,2087,0,98,105,140,1,0,0,0,349,0,0,0,16,3,4,9,True
0,5V5 RANKED SOLO GAMES,Blue,3944251280,11,0,0,6578,10,111,Nautilus,0,10,0,174,0,16060,5,7,0,0,False,False,False,False,False,False,6099,6000,UTILITY,0,1,Plated Steelcaps,Locket of the Iron Solari,Bulwark of the Mountain,Cloth Armor,Warden's Mail,Control Ward,Oracle Lens,28,1,2,BOTTOM,0,2,1,407,10589,4457,4313,0,0,1,0,0,5,0,5748,1951,6472,4568,6zpHc103eNNhAt8xC6Ujhak476wF7TuTm4K4vrCU2Zuu9Y...,0,,,SUPPORT,0,50,17,21,9,13,,4,,-wR2sAu8IN0jiLgjgGyDQsjpnyX5tSwTwZHcIE7L9aS1TbyQ,274,From Iron,False,100,UTILITY,46,1347,21927,7026,671,11566,62,0,43,246,98,1,0,5589,616,779,0,9,0,53,8,6,31,False
0,5V5 RANKED SOLO GAMES,Red,3944226110,0,0,0,4583,8,202,Jhin,0,6,0,0,0,3309,5,2,0,0,False,False,False,False,False,True,4305,3100,BOTTOM,0,0,Serrated Dirk,Refillable Potion,Boots,Vampiric Scepter,,Long Sword,Stealth Ward,16,0,1,NONE,358,0,1,281,148,0,1678,0,0,0,0,0,9,0,34032,3541,6081,4568,6zpHc103eNNhAt8xC6Ujhak476wF7TuTm4K4vrCU2Zuu9Y...,0,,,SUPPORT,0,36,17,3,10,2,,3,,-wR2sAu8IN0jiLgjgGyDQsjpnyX5tSwTwZHcIE7L9aS1TbyQ,274,From Iron,False,200,BOTTOM,3,912,34180,3541,0,8214,1718,216,99,7,75,3,0,0,0,454,0,3,0,18,2,3,10,False
0,5V5 RANKED SOLO GAMES,Red,3941904096,20,0,0,10417,13,111,Nautilus,0,11,320,999,320,30781,7,8,0,0,False,False,False,False,False,False,7821,6575,UTILITY,0,0,Plated Steelcaps,Locket of the Iron Solari,Bulwark of the Mountain,Control Ward,Kindlegem,Warden's Mail,Oracle Lens,27,0,0,BOTTOM,0,0,0,690,8449,5522,8327,0,0,0,0,0,10,0,7874,1919,10809,4568,6zpHc103eNNhAt8xC6Ujhak476wF7TuTm4K4vrCU2Zuu9Y...,0,,,SUPPORT,0,71,46,24,9,5,,5,,-wR2sAu8IN0jiLgjgGyDQsjpnyX5tSwTwZHcIE7L9aS1TbyQ,274,From Iron,False,200,UTILITY,60,1841,21192,8025,1644,19624,1203,0,40,190,212,1,0,4868,584,487,0,1,0,64,9,8,35,True
0,5V5 RANKED SOLO GAMES,Red,3941845033,18,0,1,8840,12,111,Nautilus,0,10,796,796,796,15092,4,6,0,0,False,False,False,False,False,False,7579,6050,UTILITY,0,0,Ruby Crystal,Locket of the Iron Solari,Bulwark of the Mountain,Control Ward,Bramble Vest,Plated Steelcaps,Oracle Lens,25,0,2,BOTTOM,0,0,1,538,8924,6108,3112,0,0,0,0,0,10,0,7045,2408,7007,4568,6zpHc103eNNhAt8xC6Ujhak476wF7TuTm4K4vrCU2Zuu9Y...,0,,,SOLO,0,60,32,27,6,4,,5,,-wR2sAu8IN0jiLgjgGyDQsjpnyX5tSwTwZHcIE7L9aS1TbyQ,274,From Iron,False,200,UTILITY,59,1383,22463,9136,541,10449,19,0,32,191,94,1,0,6494,619,329,0,1,0,52,8,6,21,True


In [51]:
final_df.shape

(5000, 104)

In [52]:
final_df.to_csv('export_data/data.csv')