The idea here is to extract timelines from the data. This will generally involve figuring out how the hell to parse the timeline events.

In [3]:
%matplotlib inline
import numpy as np
import json
import matplotlib.pyplot as plt

In [4]:
with open("datasets/matches/1179293066.json", "r") as f:
    data = json.load(f)

## Discrete Event Timeline ##

We want to extract a timeline of a certain kind of discrete event (eg item purchases). The ordering of events could prove invaluable.

In [35]:
data["timeline"]["frames"][3]['participantFrames']

{'1': {'currentGold': 344,
  'dominionScore': 0,
  'jungleMinionsKilled': 4,
  'level': 2,
  'minionsKilled': 0,
  'participantId': 1,
  'position': {'x': 7614, 'y': 4025},
  'teamScore': 0,
  'totalGold': 814,
  'xp': 500},
 '10': {'currentGold': 428,
  'dominionScore': 0,
  'jungleMinionsKilled': 6,
  'level': 3,
  'minionsKilled': 0,
  'participantId': 10,
  'position': {'x': 11116, 'y': 8502},
  'teamScore': 0,
  'totalGold': 898,
  'xp': 672},
 '2': {'currentGold': 266,
  'dominionScore': 0,
  'jungleMinionsKilled': 0,
  'level': 2,
  'minionsKilled': 5,
  'participantId': 2,
  'position': {'x': 6961, 'y': 7243},
  'teamScore': 0,
  'totalGold': 741,
  'xp': 500},
 '3': {'currentGold': 231,
  'dominionScore': 0,
  'jungleMinionsKilled': 0,
  'level': 1,
  'minionsKilled': 3,
  'participantId': 3,
  'position': {'x': 852, 'y': 10985},
  'teamScore': 0,
  'totalGold': 701,
  'xp': 264},
 '4': {'currentGold': 176,
  'dominionScore': 0,
  'jungleMinionsKilled': 0,
  'level': 2,
  'min

In [26]:
def event_selector(data, participantId, eventType):
    def select_events(frame):
        if 'events' not in frame:
            return []
        return [event for event in frame['events'] 
                if event['eventType'] == eventType
                and event['participantId'] == participantId ]
    return [item for l in map(select_events, data['timeline']['frames']) for item in l]

In [29]:
event_timeline(data, 1, 'SKILL_LEVEL_UP')

[{'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 2,
  'timestamp': 45667},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 129787},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 1,
  'timestamp': 195859},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 286875},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 358399},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 4,
  'timestamp': 510350},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 625389},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 1,
  'timestamp': 721002},
 {'eventT

## Continuous Timeline ##
Some data (such as goldEarned) is a continuous value sampled at a given rate. It turns out that we will need much the same structure as with discrete events. FP to the rescue!

In [36]:
def timeline(data, selector):
    return [item for sub in [selector(frame) for frame in data['timeline']['frames']] for item in sub]

In [41]:
def event_selector(participantId, eventType):
    def selector(frame):
        if 'events' in frame:
            return [event for event in frame['events']
                    if event['eventType'] == eventType
                    and event['participantId'] == participantId]
        else:
            return []
    return selector

In [42]:
timeline(data, event_selector(1, 'SKILL_LEVEL_UP'))

[{'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 2,
  'timestamp': 45667},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 129787},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 1,
  'timestamp': 195859},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 286875},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 358399},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 4,
  'timestamp': 510350},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 3,
  'timestamp': 625389},
 {'eventType': 'SKILL_LEVEL_UP',
  'levelUpType': 'NORMAL',
  'participantId': 1,
  'skillSlot': 1,
  'timestamp': 721002},
 {'eventT

In [53]:
def part_selector(participantId, key):
    def selector(frame):
        val = {'timestamp': frame['timestamp'],
               'value': frame['participantFrames'][str(participantId)][key]}
        return [val]
    return selector

In [54]:
timeline(data, part_selector(1, 'totalGold'))

[{'timestamp': 0, 'value': 475},
 {'timestamp': 60011, 'value': 475},
 {'timestamp': 120017, 'value': 532},
 {'timestamp': 180020, 'value': 814},
 {'timestamp': 240040, 'value': 1102},
 {'timestamp': 300053, 'value': 1424},
 {'timestamp': 360110, 'value': 2049},
 {'timestamp': 420154, 'value': 2264},
 {'timestamp': 480188, 'value': 2488},
 {'timestamp': 540204, 'value': 2644},
 {'timestamp': 600261, 'value': 2958},
 {'timestamp': 660276, 'value': 3388},
 {'timestamp': 720283, 'value': 3854},
 {'timestamp': 780304, 'value': 4218},
 {'timestamp': 840309, 'value': 4620},
 {'timestamp': 900326, 'value': 5008},
 {'timestamp': 960332, 'value': 5368},
 {'timestamp': 1020333, 'value': 5800},
 {'timestamp': 1080357, 'value': 7059},
 {'timestamp': 1140369, 'value': 7284},
 {'timestamp': 1200375, 'value': 7398},
 {'timestamp': 1260408, 'value': 7512},
 {'timestamp': 1320411, 'value': 7626},
 {'timestamp': 1380443, 'value': 7740},
 {'timestamp': 1440468, 'value': 7854},
 {'timestamp': 1500507, 'va

## Binning ##
We are going to want to feed this to some ML or stats algorithms, but I don't know how to have continuous data like this. So, we're going to bin it into (up to) 60 1-minute bins. Binning and feeding to ML really only makes sense for continuous values right off the bat. For discrete values, further processing will be needed. Will need to try a couple of examples of that to figure out what we need.

In [56]:
def bucket_timeline(timeline, interval=60000, intervals=60):
    buckets = np.zeros((intervals,))
    for event in timeline:
        index = event['timestamp'] // interval
        buckets[index] = event['value']
    return buckets

In [60]:
A = bucket_timeline(timeline(data, part_selector(1, 'totalGold')))
B = bucket_timeline(timeline(data, part_selector(2, 'totalGold')))
A - B

array([    0.,     0.,     0.,    73.,   192.,   202.,   646.,   689.,
         643.,   207.,   187.,   362.,   536.,   364.,   357.,   552.,
         734.,   880.,  1358.,  1289.,  1289.,  1289.,  1289.,  1289.,
        1289.,  1289.,  1289.,  1289.,     0.,     0.,     0.,     0.,
           0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
           0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
           0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
           0.,     0.,     0.,     0.])

In [61]:
def bucket_discrete_timeline(timeline, interval=60000, intervals=60):
    buckets = []
    for i in range(intervals):
        buckets.append([])
    for event in timeline:
        index = event['timestamp'] // interval
        buckets[index].append(event)
    return buckets

In [63]:
bucket_discrete_timeline(timeline(data, event_selector(1, 'ITEM_PURCHASED')))

[[{'eventType': 'ITEM_PURCHASED',
   'itemId': 1039,
   'participantId': 1,
   'timestamp': 3909},
  {'eventType': 'ITEM_PURCHASED',
   'itemId': 2003,
   'participantId': 1,
   'timestamp': 4451},
  {'eventType': 'ITEM_PURCHASED',
   'itemId': 2003,
   'participantId': 1,
   'timestamp': 4582},
  {'eventType': 'ITEM_PURCHASED',
   'itemId': 3340,
   'participantId': 1,
   'timestamp': 5002}],
 [],
 [],
 [{'eventType': 'ITEM_PURCHASED',
   'itemId': 3715,
   'participantId': 1,
   'timestamp': 225186},
  {'eventType': 'ITEM_PURCHASED',
   'itemId': 2003,
   'participantId': 1,
   'timestamp': 225707},
  {'eventType': 'ITEM_PURCHASED',
   'itemId': 2003,
   'participantId': 1,
   'timestamp': 225869},
  {'eventType': 'ITEM_PURCHASED',
   'itemId': 2003,
   'participantId': 1,
   'timestamp': 226003},
  {'eventType': 'ITEM_PURCHASED',
   'itemId': 2003,
   'participantId': 1,
   'timestamp': 226167}],
 [],
 [],
 [{'eventType': 'ITEM_PURCHASED',
   'itemId': 1036,
   'participantId': 1,
 

## Conclusion ##
I will conclude this initial foray into timeline data by computing the adversarial timeline between mid-laners:

In [81]:
A, B = [part['participantId'] for part in data['participants'] if part['timeline']['lane'] == 'MIDDLE']
gold_A = bucket_timeline(timeline(data, part_selector(A, 'totalGold')))
gold_B = bucket_timeline(timeline(data, part_selector(B, 'totalGold')))
print(gold_A - gold_B)
print([part['stats']['winner'] for part in data['participants'] if part['participantId'] in [A, B]])

[    0.     0.     0.   -45.  -155.  -100.  -186.  -149.   -75.   173.
   393.   460.   621.   834.  1129.  1039.   526.   471.  1094.  1101.
  1079.  1079.  1079.   659.   214.    66.  -274.  -324.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.]
[False, True]


*Fin*