In [1]:
import pandas as pd
import numpy as np 
from statistics import mean
from sklearn.linear_model import LinearRegression

In [2]:
timelines_data = pd.read_json("data/timelines_wv5j6VBEmzdK2utXs45NnlvPvwRXMOsBEHIP6zAME2UE5yWGQnO4QGdPES59nYZ3smWsod2ne8IVZQ.jsonl", lines = True)
timelines = pd.DataFrame(timelines_data['metadata'].to_list())
timelines.head(3)

Unnamed: 0,dataVersion,matchId,participants
0,2,NA1_5272522451,[ZPVRzI2AT1wgZhxjtaDI3h80s3X4eiKQZnkTZLOd5yr88...
1,2,NA1_5228977035,[PHFC8ye_OgxGH7zpIqSEVvalDOIqsh38air5l_1G3y3bQ...
2,2,NA1_5272507594,[lgr9k4BJU3kIwyZ9OZOpDmSMdq9qaWfHyNKcy1Va9ABB9...


First, we read our json. Note that the data that I am providing is ".jsonl"; this means that we need to include the `lines=True` argument.

Then, we look at the metadata component of the timelines data. This gives us the PUUIDs for users (also accessible in the info section; see below) and matchId. matchId is very important for removing duplicate entries from the dataframe (but is also available in the info section).

All-in-all, we can discard this portion of our data.

In [3]:
timelines = pd.DataFrame(timelines_data['info'].to_list())
timelines.head(3)

Unnamed: 0,endOfGameResult,frameInterval,frames,gameId,participants
0,GameComplete,60000,"[{'events': [{'realTimestamp': 1745346410073, ...",5272522451,"[{'participantId': 1, 'puuid': 'ZPVRzI2AT1wgZh..."
1,GameComplete,60000,"[{'events': [{'realTimestamp': 1739503531840, ...",5228977035,"[{'participantId': 1, 'puuid': 'PHFC8ye_OgxGH7..."
2,GameComplete,60000,"[{'events': [{'realTimestamp': 1745344123861, ...",5272507594,"[{'participantId': 1, 'puuid': 'lgr9k4BJU3kIwy..."


In [4]:
print(set(timelines['endOfGameResult']))

{'GameComplete'}


In [5]:
print(set(timelines['frameInterval']))

{60000}


In [6]:
print(len(set(timelines['gameId'])))

422


The `'info'` portion of the data has only a few columns. 

`endOfGameResult` is whether the game ended normally (through nexus destruction or forfeit) or was premtively ended for other reasons. We should probably discard games that did not end in "GameComplete". Fortunately, all of the ones here did.

`frameInterval` is how long in milliseconds between each frame snapshot.

`frames` is a massive list of nested lists and dictionaries that lay out data about the game taken at an interval.

`gameId` is the unique identifier for the game; we should discard duplicates

`participants` is a dictionary containing puuids and participant ids for players within the match. Potentially useful for giving the model data about a player's performance in previous games or their rank.


Curiously, we can't actually directly get the result of the game through any of our data here. It means we will have to include data from the other API endpoint, "Match Data".

*Let's dig in to `frames` more...*

In [7]:
entry_idx = 0
time = 2
timelines['frames'][entry_idx][time]['events'][:3]

[{'itemId': 3340,
  'participantId': 7,
  'timestamp': 61482,
  'type': 'ITEM_DESTROYED'},
 {'itemId': 3364,
  'participantId': 7,
  'timestamp': 61482,
  'type': 'ITEM_PURCHASED'},
 {'creatorId': 10,
  'timestamp': 70233,
  'type': 'WARD_PLACED',
  'wardType': 'UNDEFINED'}]

Here's the first 3 events on the `time`= 2 or 2 * `frameInterval`, which simplifies to simply being between the 1 and 2 minute marks in the game. 

Effectively, this is a log of what participants did; you can see that participant 7 purchased item 3340 at 61 seconds in or that participant 10 placed a ward at 70 seconds.

In [8]:
frames_15 = timelines[timelines['frames'].apply(lambda x: len(x) > 15)]

In [9]:
for p in list(map(str,range(1, 11))):
    frames = mean(f[15]['participantFrames'][p]['jungleMinionsKilled'] for f in frames_15['frames'])
    print(str(p) + "    " + str(frames))

1    0.6650602409638554
2    85.77831325301204
3    0.6120481927710844
4    0.27710843373493976
5    0.26506024096385544
6    0.43614457831325304
7    84.57590361445783
8    0.327710843373494
9    0.2891566265060241
10    0.26987951807228916


With the frames data, we can get information such as jungle minions killed per position. It looks like participant 1, the blue side toplaner, tends to kill more jungle minions by about 50% than the opposing top laner... That's interesting.

We can also get other good info like this:

In [None]:
from enum import Enum
class Position(Enum):
    TOP = '1'
    JGL = '2'
    MID = '3'
    ADC = '4'
    SUP = '5'

In [11]:
positions = dict()
for p in Position:
    damage = mean(f[15]['participantFrames'][p.value]['damageStats']['totalDamageDoneToChampions'] for f in frames_15['frames'])
    print(str(p) + "  " + str(damage))
    positions.update({str(p) : damage})

print("Highest Damage Position @15m: " + str(max(positions.values())))
print("Lowest Damage Position @15m: " + str(min(positions.values())))


Position.B_TOP  8473.404819277108
Position.B_JGL  6063.04578313253
Position.B_MID  8073.238554216868
Position.B_ADC  8111.284337349398
Position.B_SUP  5469.46265060241
Position.R_TOP  8558.32048192771
Position.R_JGL  5871.086746987952
Position.R_MID  8126.253012048192
Position.R_ADC  7810.443373493976
Position.R_SUP  5405.134939759037
Highest Damage Position @15m: 8558.32048192771
Lowest Damage Position @15m: 5405.134939759037


We can see that the red team toplaner typically does the most damage to champions at 15 minutes.

In [12]:
blue_mean_damage = mean(list(positions.values())[:5])
red_mean_damage = mean(list(positions.values())[-5:])

print("blue: " + str(blue_mean_damage) + "  red:  " + str(red_mean_damage))

blue: 7238.087228915662  red:  7154.247710843373


In [13]:
entry_idx = 200
minute = 15

# We can get specific event types like this
# This is all the wards placed in one of our games between 14 and 15 minutes:
events = frames_15['frames'][entry_idx][minute]['events']
wards = [e for e in events if e['type'] == 'WARD_PLACED']
print(wards)


[{'creatorId': 9, 'timestamp': 859805, 'type': 'WARD_PLACED', 'wardType': 'YELLOW_TRINKET'}, {'creatorId': 10, 'timestamp': 860272, 'type': 'WARD_PLACED', 'wardType': 'CONTROL_WARD'}]


In [14]:
kills = [e for e in events if e['type'] == 'CHAMPION_KILL']
len(kills)

2

I am particularly interested in the `position` data we have for each player. With this, we can create a heatmap of player movements and actions such as where they were, where they tend to get a lot of kills, where they tend to die a lot. 

This could be particularly useful for a model (or just a cool visualization!)

In [15]:
kills[0]['position']

{'x': 12954, 'y': 3378}

In [16]:
frames_15['frames'][entry_idx][minute]['participantFrames']['1']['position']

{'x': 3066, 'y': 12569}

In [23]:
n = len(frames_15['frames'])

frames_15['frames'][0][15]['events'][0]['participantId']

6

Looking at what we can use in a linear model...

In [36]:
events = []

for time in frames_15['frames'][0]:
    for event in time['events']:
        events.append(event)

events_df = pd.DataFrame(events)
events_df['type'].unique()

array(['PAUSE_END', 'ITEM_PURCHASED', 'SKILL_LEVEL_UP', 'ITEM_UNDO',
       'WARD_PLACED', 'ITEM_DESTROYED', 'LEVEL_UP', 'CHAMPION_KILL',
       'CHAMPION_SPECIAL_KILL', 'FEAT_UPDATE', 'TURRET_PLATE_DESTROYED',
       'ELITE_MONSTER_KILL', 'WARD_KILL', 'DRAGON_SOUL_GIVEN',
       'ITEM_SOLD', 'BUILDING_KILL', 'GAME_END'], dtype=object)

In [110]:
building_kills = []

for event in events:
    if event['type'] == 'ITEM_SOLD':
        building_kills.append(event)

building_kills

[{'itemId': 1056,
  'participantId': 8,
  'timestamp': 767255,
  'type': 'ITEM_SOLD'},
 {'itemId': 2031,
  'participantId': 3,
  'timestamp': 975771,
  'type': 'ITEM_SOLD'},
 {'itemId': 1083,
  'participantId': 3,
  'timestamp': 1128144,
  'type': 'ITEM_SOLD'},
 {'itemId': 2031,
  'participantId': 6,
  'timestamp': 1151393,
  'type': 'ITEM_SOLD'},
 {'itemId': 1054,
  'participantId': 6,
  'timestamp': 1231576,
  'type': 'ITEM_SOLD'},
 {'itemId': 2055,
  'participantId': 10,
  'timestamp': 1541513,
  'type': 'ITEM_SOLD'},
 {'itemId': 1055,
  'participantId': 3,
  'timestamp': 1860941,
  'type': 'ITEM_SOLD'},
 {'itemId': 1055,
  'participantId': 4,
  'timestamp': 1874822,
  'type': 'ITEM_SOLD'},
 {'itemId': 1055,
  'participantId': 9,
  'timestamp': 1912463,
  'type': 'ITEM_SOLD'},
 {'itemId': 1055,
  'participantId': 3,
  'timestamp': 2019678,
  'type': 'ITEM_SOLD'},
 {'itemId': 1055,
  'participantId': 3,
  'timestamp': 2037732,
  'type': 'ITEM_SOLD'}]

In [109]:
items = pd.read_json("data/item.json", lines=True)
items = pd.DataFrame(items['data'][0]).T
items = items.rename_axis("id", axis=1)
items = items.iloc[: , :1]
items

id,name
1001,Boots
1004,Faerie Charm
1006,Rejuvenation Bead
1011,Giant's Belt
1018,Cloak of Agility
...,...
6694,Serylda's Grudge
6695,Serpent's Fang
6696,Axiom Arc
8001,Anathema's Chains


In [64]:
player = 7
player_events = []

for time in frames_15['frames'][0]:
    for event in time['events']:
        if 'participantId' in event and event['participantId'] == player:
            player_events.append(event)
        if 'killerId' in event and event['killerId'] == player:
            player_events.append(event)
        if 'creatorId' in event and event['creatorId'] == player:
            player_events.append(event)

player_events

[{'itemId': 1101,
  'participantId': 7,
  'timestamp': 16410,
  'type': 'ITEM_PURCHASED'},
 {'itemId': 3340,
  'participantId': 7,
  'timestamp': 16945,
  'type': 'ITEM_PURCHASED'},
 {'itemId': 2003,
  'participantId': 7,
  'timestamp': 17279,
  'type': 'ITEM_PURCHASED'},
 {'creatorId': 7,
  'timestamp': 50880,
  'type': 'WARD_PLACED',
  'wardType': 'YELLOW_TRINKET'},
 {'itemId': 3340,
  'participantId': 7,
  'timestamp': 61482,
  'type': 'ITEM_DESTROYED'},
 {'itemId': 3364,
  'participantId': 7,
  'timestamp': 61482,
  'type': 'ITEM_PURCHASED'},
 {'levelUpType': 'NORMAL',
  'participantId': 7,
  'skillSlot': 2,
  'timestamp': 89572,
  'type': 'SKILL_LEVEL_UP'},
 {'level': 2, 'participantId': 7, 'timestamp': 109950, 'type': 'LEVEL_UP'},
 {'levelUpType': 'NORMAL',
  'participantId': 7,
  'skillSlot': 3,
  'timestamp': 111921,
  'type': 'SKILL_LEVEL_UP'},
 {'level': 3, 'participantId': 7, 'timestamp': 156111, 'type': 'LEVEL_UP'},
 {'levelUpType': 'NORMAL',
  'participantId': 7,
  'skillS

-> Role (0-5)
-> Champion (0-188)
-> Number of wards placed (0-200)
-> Game duration (0-50000)

<- Win?

In [65]:
test_df = pd.DataFrame(player_events)
wards = len(test_df[test_df['type'] == 'WARD_KILL'])
wards

6