In [2]:
from riotwatcher import LolWatcher, ApiError
import pandas as pd
from private_functions import getAPI

Here are my early helper functions I set up for ease of access to lol_watcher and its uses. 

In [3]:
def get_puuid(name):
    """
    Input is summoner name. Outputs players puuid.
    """
    return lol_watcher.summoner.by_name(region, name)['puuid']
def get_matchlist(name):
    """
    Input is summoner name. Output is a list of the 100 most recent ranked matches for that summoner.
    """
    return lol_watcher.match.matchlist_by_puuid(region, get_puuid(name), count=100, type='ranked')
def get_match(match_id):
    '''
    Helper function that allows every other get function to not need to call the lol_watcher everytime it runs.
    '''
    return lol_watcher.match.by_id(region, match_id)

Do not ever put API's in your repositories. I have mine in a local file and call it here.

I also have my region defaulted to 'na1' and my match_id set to one of my own personal games for testing. 

In [4]:
myApi = getAPI()
lol_watcher = LolWatcher(myApi)

In [5]:
region = 'na1'
match_id = 'NA1_4286956599'
match = get_match(match_id)

Here is my first look into what I will be working with. It is how a match comes straight from the API without any cleaning or tinkering. At first glance it is a bit overwhelming, but with patenince I can help break it down.

In [6]:
match

{'metadata': {'dataVersion': '2',
  'matchId': 'NA1_4286956599',
  'participants': ['oOUDXEZnLJdcXhLQlPD9hPLgd0k0BKde7xFhcogb7-xNvQ--M_ZOIM94-B9Ji_J0qM8i48D1vmvVwQ',
   'JgFpRq3Leoe9Mn_GzPSvCholBc7VVqNYh6x8hKfUe--JH4NgKuEIufzc3xpV58DWkzw9uA8WHMaeHw',
   '4Yfk03L9agtZMzJU-RENCM_XDZfkkMmZhyH0Hcc294Bxib5KEAooyLQ_hmu0VAHwgHG8fPxi1vRGew',
   'LqVxfFFNqKspmB2GLnppd6SnVZTgdlrxep_2XqwqJMMXnHbC2ZzbnIVTX0wF-l8THeH_kQ0uSceo0w',
   'Y5S57zDapZMqHE4mI3-NstSAPi5kDfIb3UDsqIhDF-ZCRwhAu5Dl63vAKyp_nfx87X5SMI9gPhRAhQ',
   'feMIK6EL0c3sg6Ytx83CnZxLWsZq5ywbkiv4IrVzYFVr-bxtX-ncxHtCd5NOtg5Gk-75XsWVj_9IwA',
   '-SaEIp4mr8jQKR4z2yWmOr2vWwMbeu5CTKOZygxmyj2TPId7sHfhqHVoIPdtH3UPdm5aN0aYrgOmnw',
   'AFqllfLV1CKJDzemzngWK5n7Kq93tNGqxOLKiBjAGbVNtHbAg4DT84DLy9pMVa9LciZrTnE8-dZtDg',
   'leqQwaP8IcmQXBtOKDGMeXfpxYzHqgXIsT4VzsaCve9xH3o2X2BB88LEcU9yA4Z8a1O7HmSOIlM_uw',
   'lGNgnHZQRfEk2xZ8Q4gEF9dmZ7bNn8t-pjku4krrIXD87taW0A_0Wn5hZKw8J3pDY-ART1O4IWKhJg']},
 'info': {'gameCreation': 1650778290000,
  'gameDuration': 1905,
  

Here are the functions that make up the main part of my pipeline. The goal is to clean and consolidate each match into a single row for a Pandas DataFrame. I explain how I came up with these lower if you are interested. 

In [6]:
def get_duration(match):
    '''
    Uses a match from get_match to find the games duration and returns a tiny dictionary. 
    '''
    return {'gameDuration' : match['info']['gameDuration']}

def get_win(match):
    '''
    Uses a match from get_match to return whether or not the team we are analyzing won. 
    '''
    return {'win' : match['info']['teams'][0]['win']}

def get_team_objectives(match):
    '''
    Uses a match from get_match to flatten a teams objectives for use in a DataFrame.
    Returns a dictionary of team objectives.
    '''
    obj = match['info']['teams'][0]['objectives']
    new = {}
    for k in obj:
        for key in obj[k]:
            new[k+'_'+key] = obj[k][key]
    return new

def get_avgs(match):
    '''
    Uses a match from get_match to consolidate the 5 participants from the team we are analyzing to one average.
    Returns a dictionary of averaged out statistics pertaining to team 1. 
    '''
    avg_keys = ['kills', 'deaths', 'assists', 'champLevel', 'champExperience', 'doubleKills', 'tripleKills', 'quadraKills', 
            'pentaKills','consumablesPurchased', 'damageDealtToBuildings',
            'damageDealtToObjectives', 'damageSelfMitigated', 'detectorWardsPlaced', 'timePlayed',
            'goldEarned', 'goldSpent', 'inhibitorKills', 'itemsPurchased', 'largestKillingSpree', 'killingSprees',
            'longestTimeSpentLiving', 'magicDamageDealt','magicDamageDealtToChampions', 'magicDamageTaken',
            'physicalDamageDealt', 'physicalDamageDealtToChampions', 'physicalDamageTaken', 'totalDamageDealt',
            'totalDamageDealtToChampions', 'totalDamageShieldedOnTeammates', 'totalDamageTaken', 
            'trueDamageDealt', 'trueDamageDealtToChampions', 'trueDamageTaken',
            'totalHeal', 'totalHealsOnTeammates', 'totalMinionsKilled', 'totalTimeCCDealt', 'totalTimeSpentDead',
            'turretKills', 'turretsLost', 'neutralMinionsKilled', 'objectivesStolen', 'objectivesStolenAssists', 
            'visionScore','visionWardsBoughtInGame', 'wardsKilled', 'wardsPlaced']
    new = {}
    for i in match['info']['participants']:
        if i['teamId'] == 100:
            for j in avg_keys:
                try:
                    new['avg'+j[0].upper()+j[1:]] += i[j]
                except:
                    new['avg'+j[0].upper()+j[1:]] = i[j]
    new.update((k, round(v * 0.2, 3)) for k,v in new.items())
    return new

def get_challenges(match):
    '''
     Uses a match from get_match to consolidate the 5 participants challenges and average them out.
     Returns a dictionary of all the challanges averages for team 1. 
    '''
    chal_keys = ['abilityUses', 'acesBefore15Minutes', 'alliedJungleMonsterKills', 'baronTakedowns', 
                 'blastConeOppositeOpponentCount', 'bountyGold', 'buffsStolen', 'damagePerMinute', 
                 'deathsByEnemyChamps', 'dragonTakedowns', 
                 'effectiveHealAndShielding',
                 'enemyChampionImmobilizations', 'enemyJungleMonsterKills', 'epicMonsterSteals',
                 'goldPerMinute','jungleCsBefore10Minutes', 'kda', 'killParticipation', 'killsNearEnemyTurret',
                 'killsUnderOwnTurret', 'killsWithHelpFromEpicMonster',
                 'maxKillDeficit', 'multikills', 'outnumberedKills', 
                 'riftHeraldTakedowns','scuttleCrabKills', 'skillshotsDodged', 
                 'skillshotsHit', 'soloKills', 'takedowns',
                 'takedownsAfterGainingLevelAdvantage', 'takedownsBeforeJungleMinionSpawn', 
                 'turretPlatesTaken', 'turretTakedowns',
                 'turretsTakenWithRiftHerald', 'visionScorePerMinute',
                 'wardTakedownsBefore20M', 'wardsGuarded','soloTurretsLategame', 'dodgeSkillShotsSmallWindow', 
                 'tookLargeDamageSurvived', 'pickKillWithAlly', 
                 'knockEnemyIntoTeamAndKill', 'visionScoreAdvantageLaneOpponent', 'maxLevelLeadLaneOpponent', 
                 'laningPhaseGoldExpAdvantage', 'earlyLaningPhaseGoldExpAdvantage', 'earliestBaron', 
                 'killsOnLanersEarlyJungleAsJungler', 'junglerKillsEarlyJungle', 'takedownsFirst25Minutes']
    new = {}
    for i in match['info']['participants']:
        if i['teamId'] == 100:
            for j in chal_keys:
                try:
                    try:
                        new['avg'+j[0].upper()+j[1:]] += i['challenges'][j]/5
                    except:
                        new['avg'+j[0].upper()+j[1:]] = i['challenges'][j]/5
                except:
                    new['avg'+j[0].upper()+j[1:]] = None
    for k,v in new.items():
        try:
            new[k] = round(v, 4)
        except:    
            pass
    return new

def get_row(match_id):
    ''' 
    Puts everything together into a single row for a new DataFrame
    '''
    match = get_match(match_id)
    data = {}
    data.update(get_avgs(match))
    data.update(get_challenges(match))
    data.update(get_team_objectives(match))
    data.update(get_duration(match))
    data.update(get_win(match))
    return data
def format_data(match_list):
    data = []
    for match in match_list:
        data.append(get_row(match))
    return data

This is a list of my friends and their summoner names. I plan on using this to generate games to analyze. But for now, its useless.

In [7]:
summoners = ['Zealfire', 'SmurfHD', 'DumbAmerican', 'About 10 Inches', 'Exprience', 'Leaders1', 
             'Kehlee', 'KybitOW', 'DodoMuncher', 'Mango Bleach']

This is a first glance and things get a little hairer so stick with me. If we take a look at the keys to a match, we only get `'metadata'` and `'info'`. So lets take a look. 

In [8]:
match.keys()

dict_keys(['metadata', 'info'])

As you can see there isn't a whole lost of useful information in the metadata. It tell us the matchId, which is the region and gameId combined. And it tell us the puuid's of the summoners who are playing. 

Info on the other hand, looks like it has all the information we are going to need. And is quite large and therefore harder to understand. 

In [9]:
match['metadata'], match['info']

({'dataVersion': '2',
  'matchId': 'NA1_4286956599',
  'participants': ['oOUDXEZnLJdcXhLQlPD9hPLgd0k0BKde7xFhcogb7-xNvQ--M_ZOIM94-B9Ji_J0qM8i48D1vmvVwQ',
   'JgFpRq3Leoe9Mn_GzPSvCholBc7VVqNYh6x8hKfUe--JH4NgKuEIufzc3xpV58DWkzw9uA8WHMaeHw',
   '4Yfk03L9agtZMzJU-RENCM_XDZfkkMmZhyH0Hcc294Bxib5KEAooyLQ_hmu0VAHwgHG8fPxi1vRGew',
   'LqVxfFFNqKspmB2GLnppd6SnVZTgdlrxep_2XqwqJMMXnHbC2ZzbnIVTX0wF-l8THeH_kQ0uSceo0w',
   'Y5S57zDapZMqHE4mI3-NstSAPi5kDfIb3UDsqIhDF-ZCRwhAu5Dl63vAKyp_nfx87X5SMI9gPhRAhQ',
   'feMIK6EL0c3sg6Ytx83CnZxLWsZq5ywbkiv4IrVzYFVr-bxtX-ncxHtCd5NOtg5Gk-75XsWVj_9IwA',
   '-SaEIp4mr8jQKR4z2yWmOr2vWwMbeu5CTKOZygxmyj2TPId7sHfhqHVoIPdtH3UPdm5aN0aYrgOmnw',
   'AFqllfLV1CKJDzemzngWK5n7Kq93tNGqxOLKiBjAGbVNtHbAg4DT84DLy9pMVa9LciZrTnE8-dZtDg',
   'leqQwaP8IcmQXBtOKDGMeXfpxYzHqgXIsT4VzsaCve9xH3o2X2BB88LEcU9yA4Z8a1O7HmSOIlM_uw',
   'lGNgnHZQRfEk2xZ8Q4gEF9dmZ7bNn8t-pjku4krrIXD87taW0A_0Wn5hZKw8J3pDY-ART1O4IWKhJg']},
 {'gameCreation': 1650778290000,
  'gameDuration': 1905,
  'gameEndTimestamp': 

I decided to try put the `match['info']` into a Pandas DataFrame to get a better idea of what I am working with. At first glance we can see that there is some useful info all ready for us, for instance gameDuration. So I first went ahead and set up a get function to grab how long the game went for. 

Next I noticed the participants and teams columns have lists of dictionaries and I suspect most of the data is hiding in there. 

In [10]:
pd.DataFrame([match['info']])

Unnamed: 0,gameCreation,gameDuration,gameEndTimestamp,gameId,gameMode,gameName,gameStartTimestamp,gameType,gameVersion,mapId,participants,platformId,queueId,teams,tournamentCode
0,1650778290000,1905,1650780212165,4286956599,CLASSIC,teambuilder-match-4286956599,1650778306516,MATCHED_GAME,12.7.433.4138,11,"[{'assists': 11, 'baronKills': 0, 'bountyLevel...",NA1,420,"[{'bans': [{'championId': 23, 'pickTurn': 1}, ...",


I checked out teams first. I don't really care about which champion is being banned or when it was banned. But the rest of the info is very important. First I set up another basic get function to grab wether or not they win. And next I check out the objectives. 

In [11]:
match['info']['teams'][0]

{'bans': [{'championId': 23, 'pickTurn': 1},
  {'championId': 131, 'pickTurn': 2},
  {'championId': 111, 'pickTurn': 3},
  {'championId': 498, 'pickTurn': 4},
  {'championId': 157, 'pickTurn': 5}],
 'objectives': {'baron': {'first': True, 'kills': 1},
  'champion': {'first': True, 'kills': 41},
  'dragon': {'first': False, 'kills': 0},
  'inhibitor': {'first': False, 'kills': 0},
  'riftHerald': {'first': True, 'kills': 1},
  'tower': {'first': False, 'kills': 5}},
 'teamId': 100,
 'win': False}

In [12]:
match['info']['teams'][0]['objectives'].keys()

dict_keys(['baron', 'champion', 'dragon', 'inhibitor', 'riftHerald', 'tower'])

As you can see the objectives are very important but are not in a good format that would be easy to analyze. So we have a little work to do. 

In [13]:
obj = match['info']['teams'][0]['objectives']
obj

{'baron': {'first': True, 'kills': 1},
 'champion': {'first': True, 'kills': 41},
 'dragon': {'first': False, 'kills': 0},
 'inhibitor': {'first': False, 'kills': 0},
 'riftHerald': {'first': True, 'kills': 1},
 'tower': {'first': False, 'kills': 5}}

This next cell takes the original objectives and flattens it out to be a single dictionary filled with all the data we needed from this section. This way we can concat this onto a larger dictionary and build a very useful DataFrame with it. You might even recognize this cell as very similar code is now in my pipeline. 

In [14]:
new = {}
for k in obj:
    for key in obj[k]:
        new[k+key.capitalize()] = obj[k][key]
new

{'baronFirst': True,
 'baronKills': 1,
 'championFirst': True,
 'championKills': 41,
 'dragonFirst': False,
 'dragonKills': 0,
 'inhibitorFirst': False,
 'inhibitorKills': 0,
 'riftHeraldFirst': True,
 'riftHeraldKills': 1,
 'towerFirst': False,
 'towerKills': 5}

My next hurdle is a big one. I need to take 5 individual team members and get the average for every continuous variable in participants. First things first, I need to take a good look at what variables are available and slowly pick which ones would be helpful for analysis. I also put the variables in a more intuitive order, similar variables by eachother etc.

In [15]:
match['info']['participants'][0].keys()

dict_keys(['assists', 'baronKills', 'bountyLevel', 'challenges', 'champExperience', 'champLevel', 'championId', 'championName', 'championTransform', 'consumablesPurchased', 'damageDealtToBuildings', 'damageDealtToObjectives', 'damageDealtToTurrets', 'damageSelfMitigated', 'deaths', 'detectorWardsPlaced', 'doubleKills', 'dragonKills', 'eligibleForProgression', 'firstBloodAssist', 'firstBloodKill', 'firstTowerAssist', 'firstTowerKill', 'gameEndedInEarlySurrender', 'gameEndedInSurrender', 'goldEarned', 'goldSpent', 'individualPosition', 'inhibitorKills', 'inhibitorTakedowns', 'inhibitorsLost', 'item0', 'item1', 'item2', 'item3', 'item4', 'item5', 'item6', 'itemsPurchased', 'killingSprees', 'kills', 'lane', 'largestCriticalStrike', 'largestKillingSpree', 'largestMultiKill', 'longestTimeSpentLiving', 'magicDamageDealt', 'magicDamageDealtToChampions', 'magicDamageTaken', 'neutralMinionsKilled', 'nexusKills', 'nexusLost', 'nexusTakedowns', 'objectivesStolen', 'objectivesStolenAssists', 'parti

In [16]:
avg_keys = ['kills', 'deaths', 'assists', 'champLevel', 'champExperience', 'doubleKills', 'tripleKills', 'quadraKills', 
            'pentaKills','consumablesPurchased', 'damageDealtToBuildings',
            'damageDealtToObjectives', 'damageSelfMitigated', 'detectorWardsPlaced', 
            'goldEarned', 'goldSpent', 'inhibitorKills', 'itemsPurchased', 'largestKillingSpree', 'killingSprees',
            'longestTimeSpentLiving', 'magicDamageDealt','magicDamageDealtToChampions', 'magicDamageTaken',
            'physicalDamageDealt', 'physicalDamageDealtToChampions', 'physicalDamageTaken', 'totalDamageDealt',
            'totalDamageDealtToChampions', 'totalDamageShieldedOnTeammates', 'totalDamageTaken', 
            'trueDamageDealt', 'trueDamageDealtToChampions', 'trueDamageTaken',
            'totalHeal', 'totalHealsOnTeammates', 'totalMinionsKilled', 'totalTimeCCDealt', 'totalTimeSpentDead',
            'turretKills', 'turretsLost', 'neutralMinionsKilled', 'objectivesStolen', 'objectivesStolenAssists', 
            'visionScore','visionWardsBoughtInGame', 'wardsKilled', 'wardsPlaced']

After going through and making sure I will be grabbing everything that I want. I set up the cell below to add up the values and average them out between the team. It worked exactly how I wanted it to so again I added it to my pipeline. 

In [17]:
new = {}
for i in match['info']['participants']:
    if i['teamId'] == 100:
        for j in avg_keys:
            try:
                new['avg'+j[0].upper()+j[1:]] += i[j]
            except:
                new['avg'+j[0].upper()+j[1:]] = i[j]
new.update((k, round(v * 0.2, 3)) for k,v in new.items())
new

{'avgKills': 8.2,
 'avgDeaths': 12.2,
 'avgAssists': 6.6,
 'avgChampLevel': 16.0,
 'avgChampExperience': 15302.8,
 'avgDoubleKills': 1.2,
 'avgTripleKills': 0.0,
 'avgQuadraKills': 0.0,
 'avgPentaKills': 0.0,
 'avgConsumablesPurchased': 3.8,
 'avgDamageDealtToBuildings': 2657.8,
 'avgDamageDealtToObjectives': 9446.4,
 'avgDamageSelfMitigated': 24171.2,
 'avgDetectorWardsPlaced': 0.8,
 'avgGoldEarned': 13615.2,
 'avgGoldSpent': 12931.0,
 'avgInhibitorKills': 0.0,
 'avgItemsPurchased': 23.0,
 'avgLargestKillingSpree': 2.4,
 'avgKillingSprees': 1.6,
 'avgLongestTimeSpentLiving': 430.4,
 'avgMagicDamageDealt': 38503.8,
 'avgMagicDamageDealtToChampions': 7567.6,
 'avgMagicDamageTaken': 4398.8,
 'avgPhysicalDamageDealt': 76610.2,
 'avgPhysicalDamageDealtToChampions': 10587.4,
 'avgPhysicalDamageTaken': 21295.2,
 'avgTotalDamageDealt': 130064.8,
 'avgTotalDamageDealtToChampions': 23560.4,
 'avgTotalDamageShieldedOnTeammates': 0.0,
 'avgTotalDamageTaken': 30428.6,
 'avgTrueDamageDealt': 14950.

While looking through the participants, I found that the challenges was extremely long and has a ton of information. More nit picking to make sure I get all the information I want. 

In [18]:
match['info']['participants'][1]['challenges'].keys()

dict_keys(['12AssistStreakCount', 'abilityUses', 'acesBefore15Minutes', 'alliedJungleMonsterKills', 'baronTakedowns', 'blastConeOppositeOpponentCount', 'bountyGold', 'buffsStolen', 'completeSupportQuestInTime', 'controlWardsPlaced', 'damagePerMinute', 'damageTakenOnTeamPercentage', 'dancedWithRiftHerald', 'deathsByEnemyChamps', 'dodgeSkillShotsSmallWindow', 'doubleAces', 'dragonTakedowns', 'earliestBaron', 'earlyLaningPhaseGoldExpAdvantage', 'effectiveHealAndShielding', 'elderDragonKillsWithOpposingSoul', 'elderDragonMultikills', 'enemyChampionImmobilizations', 'enemyJungleMonsterKills', 'epicMonsterKillsNearEnemyJungler', 'epicMonsterKillsWithin30SecondsOfSpawn', 'epicMonsterSteals', 'epicMonsterStolenWithoutSmite', 'flawlessAces', 'fullTeamTakedown', 'gameLength', 'goldPerMinute', 'hadAfkTeammate', 'hadOpenNexus', 'immobilizeAndKillWithAlly', 'initialBuffCount', 'initialCrabCount', 'jungleCsBefore10Minutes', 'junglerKillsEarlyJungle', 'junglerTakedownsNearDamagedEpicMonster', 'kTurre

I noticed while looking through the challenges, that they are all different lengths. Which means I have my work cut out for me.

In [19]:
first = match['info']['participants'][0]['challenges'].keys()
second = match['info']['participants'][1]['challenges'].keys()
third = match['info']['participants'][2]['challenges'].keys()
new = {}
for k in second:
    new[k] = k in third
for i in range(0,10):
    print(len(match['info']['participants'][i]['challenges'].keys()))

111
109
113
111
110
115
116
113
115
114


To make it a little easier to understand I put it into a DataFrame and called `.info()` again. I need to assign `verbose=True` or it wouldn't display everything I need to see. 

We are working with 16 floats and and 93 ints. I already notice that some of these variables should be booleans and are most likely a 1 or 0 for `True` or `False` respectively, but we will have to verify with our data. 

In [20]:
test_frame = pd.DataFrame([match['info']['participants'][6]['challenges']])
test_frame.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 116 columns):
 #   Column                                     Dtype  
---  ------                                     -----  
 0   12AssistStreakCount                        int64  
 1   abilityUses                                int64  
 2   acesBefore15Minutes                        int64  
 3   alliedJungleMonsterKills                   float64
 4   baronBuffGoldAdvantageOverThreshold        int64  
 5   baronTakedowns                             int64  
 6   blastConeOppositeOpponentCount             int64  
 7   bountyGold                                 int64  
 8   buffsStolen                                int64  
 9   completeSupportQuestInTime                 int64  
 10  controlWardTimeCoverageInRiverOrEnemyHalf  float64
 11  controlWardsPlaced                         int64  
 12  damagePerMinute                            float64
 13  damageTakenOnTeamPercentage                float64
 1

I used this to pick which variables I will be using. The list of which is below. I decieded to not use any of the booleans for the team analysis. The `not_used` portion is because not every participant has every variable and I needed to test to see which ones aren't universal. There are a lot more unused variables but these are the ones I wanted to use but can't. 

As you can see the `not_used` is empty. That is because after testing I decided to modify my code to input `None` instead of not using the variable. This allows a Data Analyst to use their discretion as to what variables should be used while cleaning. 

In [21]:
chal_keys = ['abilityUses', 'acesBefore15Minutes', 'alliedJungleMonsterKills', 'baronTakedowns', 'blastConeOppositeOpponentCount',
             'bountyGold', 'buffsStolen', 'damagePerMinute', 'deathsByEnemyChamps', 
             'dragonTakedowns', 'effectiveHealAndShielding', 'controlWardTimeCoverageInRiverOrEnemyHalf',
             'enemyChampionImmobilizations', 'enemyJungleMonsterKills', 'epicMonsterSteals', 'goldPerMinute',
             'jungleCsBefore10Minutes', 'kda', 'killParticipation', 'killsNearEnemyTurret',
             'killsUnderOwnTurret', 'killsWithHelpFromEpicMonster',  
             'maxKillDeficit', 'multikills', 'outnumberedKills', 'riftHeraldTakedowns',
             'scuttleCrabKills', 'skillshotsDodged', 'skillshotsHit', 'soloKills', 'takedowns',
             'takedownsAfterGainingLevelAdvantage', 'takedownsBeforeJungleMinionSpawn', 
             'turretPlatesTaken', 'turretTakedowns', 'turretsTakenWithRiftHerald', 
             'visionScorePerMinute', 'wardTakedownsBefore20M', 'wardsGuarded', 'soloTurretsLategame', 'dodgeSkillShotsSmallWindow', 'tookLargeDamageSurvived', 'pickKillWithAlly', 
            'knockEnemyIntoTeamAndKill', 'visionScoreAdvantageLaneOpponent', 'maxLevelLeadLaneOpponent', 
            'laningPhaseGoldExpAdvantage', 'earlyLaningPhaseGoldExpAdvantage', 'earliestBaron', 
            'killsOnLanersEarlyJungleAsJungler', 'junglerKillsEarlyJungle', 'takedownsFirst25Minutes', 
            'unseenRecalls' ]

not_used = [
            ]

This block of code looks eerily familiar, and works almost the exact same way as the one before. And after testing we can see that everything is in order. So I create a function that does the same and I am almost ready for some analysis. 

After trying to use this to source games, I ran into a problem. Some games don't have a `'Challenges'` section at all and I needed to make an exception to add NA I realized that dividing everything by 5 isn't correct. So now I have to figure out how to divid by the actual number of players who have that specific 

In [22]:
new = {}
n = 0
for i in match['info']['participants']:
    if i['teamId'] == 100:
        for j in chal_keys:
            try:
                try:
                    new['avg'+j[0].upper()+j[1:]] += i['challenges'][j]/5
                except:
                    new['avg'+j[0].upper()+j[1:]] = i['challenges'][j]/5
            except:
                    new['avg'+j[0].upper()+j[1:]] = None
# new.update((k, round(v, 4)) for k,v in new.items())
for k,v in new.items():
    try:
        new[k] = round(v, 4)
    except:    
        pass
new

{'avgAbilityUses': 153.8,
 'avgAcesBefore15Minutes': 0.0,
 'avgAlliedJungleMonsterKills': 15.76,
 'avgBaronTakedowns': 0.6,
 'avgBlastConeOppositeOpponentCount': 0.2,
 'avgBountyGold': 735.0,
 'avgBuffsStolen': 0.4,
 'avgDamagePerMinute': 741.8184,
 'avgDeathsByEnemyChamps': 12.2,
 'avgDragonTakedowns': 0.0,
 'avgEffectiveHealAndShielding': 43.2,
 'avgControlWardTimeCoverageInRiverOrEnemyHalf': None,
 'avgEnemyChampionImmobilizations': 13.0,
 'avgEnemyJungleMonsterKills': 1.92,
 'avgEpicMonsterSteals': 0.0,
 'avgGoldPerMinute': 428.6932,
 'avgJungleCsBefore10Minutes': 8.8,
 'avgKda': 1.5386,
 'avgKillParticipation': 0.2426,
 'avgKillsNearEnemyTurret': 1.4,
 'avgKillsUnderOwnTurret': 1.2,
 'avgKillsWithHelpFromEpicMonster': 0.2,
 'avgMaxKillDeficit': 0.0,
 'avgMultikills': 1.2,
 'avgOutnumberedKills': 2.0,
 'avgRiftHeraldTakedowns': 0.2,
 'avgScuttleCrabKills': 0.4,
 'avgSkillshotsDodged': 13.0,
 'avgSkillshotsHit': 27.0,
 'avgSoloKills': 3.4,
 'avgTakedowns': 14.8,
 'avgTakedownsAfterG

Here 

In [23]:
match_list = get_matchlist('Zealfire')
data = []
for match_id in match_list:
    data.append(get_row(match_id))
df = pd.DataFrame(data)
df.head()

Unnamed: 0,avgKills,avgDeaths,avgAssists,avgChampLevel,avgChampExperience,avgDoubleKills,avgTripleKills,avgQuadraKills,avgPentaKills,avgConsumablesPurchased,...,dragon_first,dragon_kills,inhibitor_first,inhibitor_kills,riftHerald_first,riftHerald_kills,tower_first,tower_kills,gameDuration,win
0,7.4,3.8,8.8,14.0,11807.8,0.8,0.2,0.0,0.0,4.0,...,True,3,False,0,True,1,True,4,1449,True
1,2.8,5.0,3.4,13.8,11766.6,0.0,0.0,0.0,0.0,4.8,...,False,0,False,0,True,1,True,2,1810,False
2,7.0,2.2,6.6,13.4,11250.2,0.8,0.0,0.0,0.0,2.4,...,True,3,True,1,True,2,True,9,1397,True
3,6.6,6.4,8.6,16.2,16070.0,0.0,0.0,0.0,0.0,4.0,...,True,2,True,2,True,1,True,10,1979,True
4,5.6,8.6,6.6,13.2,10984.0,0.6,0.0,0.0,0.0,3.0,...,False,0,False,0,True,1,True,4,1597,False


In [24]:
df.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 114 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   avgKills                                100 non-null    float64
 1   avgDeaths                               100 non-null    float64
 2   avgAssists                              100 non-null    float64
 3   avgChampLevel                           100 non-null    float64
 4   avgChampExperience                      100 non-null    float64
 5   avgDoubleKills                          100 non-null    float64
 6   avgTripleKills                          100 non-null    float64
 7   avgQuadraKills                          100 non-null    float64
 8   avgPentaKills                           100 non-null    float64
 9   avgConsumablesPurchased                 100 non-null    float64
 10  avgDamageDealtToBuildings               100 non-null    float6

In [25]:
all_ranked_matches = []
temp = [1]
start = 0
while len(temp) > 0:  
    temp = lol_watcher.match.matchlist_by_puuid(region, get_puuid('Exprience'), count=100, type='ranked', start=start)
    if len(temp) > 0:
        all_ranked_matches.append(temp)
    start += 100
len(all_ranked_matches)

8

In [26]:
data = []
for group in all_ranked_matches:
    print(all_ranked_matches.index(group))
    data += format_data(group)

0
1
2
3
4
5
6
7


In [27]:
nathan = pd.DataFrame(data)
nathan.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 716 entries, 0 to 715
Data columns (total 114 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   avgKills                                716 non-null    float64
 1   avgDeaths                               716 non-null    float64
 2   avgAssists                              716 non-null    float64
 3   avgChampLevel                           716 non-null    float64
 4   avgChampExperience                      716 non-null    float64
 5   avgDoubleKills                          716 non-null    float64
 6   avgTripleKills                          716 non-null    float64
 7   avgQuadraKills                          716 non-null    float64
 8   avgPentaKills                           716 non-null    float64
 9   avgConsumablesPurchased                 716 non-null    float64
 10  avgDamageDealtToBuildings               716 non-null    float

In [33]:
nathan.to_csv('../Data/nathanLeagueData.csv', index=False)
nathan.to_excel('../Data/nathanLeagueData.xls', index=False)