In [2]:
import pandas as pd
import numpy as np 
from statistics import mean
from sklearn.linear_model import LinearRegression

### Goal: Linear Model Predicting W/L based on Events

To accomplish this, we will compress the events in each game to singular features.

For each player, `p`, in each match, `m`, we'll end up with:



In [3]:
timelines_data = pd.read_json("data/d2_250_1_timelines.jsonl", lines = True)
timelines = pd.DataFrame(timelines_data['info'].to_list())
timelines_data = None

In [4]:
matches_data = pd.read_json("data/d2_250_1_matches.jsonl", lines = True)
matches = pd.DataFrame(matches_data['info'].to_list())
matches_data = None

In [5]:
matches = matches.drop_duplicates(subset=['gameId'])

In [6]:
df = matches.merge(timelines, on="gameId")

In [7]:
df = df[df['gameMode'] == 'CLASSIC']

In [8]:
df = df.drop(['gameCreation', 'gameMode', 'gameVersion', 'platformId', 'queueId', 
              'tournamentCode', 'gameModeMutators', 'frameInterval', 'mapId', 
              'gameType', 'gameStartTimestamp', 'gameEndTimestamp', 'gameName',
              'endOfGameResult_x', 'endOfGameResult_y'], axis=1)

In [9]:
df = df[df['gameDuration'] > 900]

In [10]:
df.head()

Unnamed: 0,gameDuration,gameId,participants_x,teams,frames,participants_y
0,1824,5421954555,"[{'PlayerScore0': 0, 'PlayerScore1': 0, 'Playe...","[{'bans': [{'championId': 887, 'pickTurn': 1},...","[{'events': [{'realTimestamp': 1764140458293, ...","[{'participantId': 1, 'puuid': 'Q1y-V54r0MfrVF..."
1,1252,5417770862,"[{'PlayerScore0': 0, 'PlayerScore1': 0, 'Playe...","[{'bans': [{'championId': 126, 'pickTurn': 1},...","[{'events': [{'realTimestamp': 1763683786918, ...","[{'participantId': 1, 'puuid': 'PwY8oV4LBajF1p..."
6,1874,5360407908,"[{'PlayerScore0': 0, 'PlayerScore1': 0, 'Playe...","[{'bans': [{'championId': 36, 'pickTurn': 1}, ...","[{'events': [{'realTimestamp': 1756759665632, ...","[{'participantId': 1, 'puuid': 'mxF9joPkqn2wGp..."
7,1713,5353144819,"[{'PlayerScore0': 0, 'PlayerScore1': 0, 'Playe...","[{'bans': [{'championId': 236, 'pickTurn': 1},...","[{'events': [{'realTimestamp': 1755922257096, ...","[{'participantId': 1, 'puuid': 'kWdt358dHpvC6T..."
8,2007,5417659188,"[{'PlayerScore0': 0, 'PlayerScore1': 0, 'Playe...","[{'bans': [{'championId': 117, 'pickTurn': 1},...","[{'events': [{'realTimestamp': 1763672882122, ...","[{'participantId': 1, 'puuid': 'VJKFH4zTTd6pB_..."


In [11]:
from enum import Enum
class Position(Enum):
    TOP = '1'
    JGL = '2'
    MID = '3'
    ADC = '4'
    SUP = '5'

position_map = {
    0: Position.TOP,
    1: Position.JGL,
    2: Position.MID,
    3: Position.ADC,
    4: Position.SUP
}

rows = [] # Role, Wards Placed, Game Duration, Win

for gameidx in range(len(df)):
    game = df.iloc[gameidx]
    for playeridx in range(10):
        wards_placed = 0
        for time in game['frames']:
            for event in time['events']:
                if 'creatorId' in event and event['creatorId'] == playeridx and event['type'] == 'WARD_PLACED':
                    wards_placed += 1

        position = position_map[playeridx % 5]
        win = game['participants_x'][playeridx]['win']
        duration = game['gameDuration']
        rows.append({'position':position, 'wards': wards_placed, 'duration': duration, 'win':win})
df_cleaned = pd.DataFrame(rows)
df_cleaned["position"] = df_cleaned["position"].astype("category")
df_cleaned.head(3)

Unnamed: 0,position,wards,duration,win
0,Position.TOP,0,1824,False
1,Position.JGL,2,1824,False
2,Position.MID,1,1824,False


In [12]:
X = pd.get_dummies(df_cleaned[["position", "wards", "duration"]], drop_first=True)
y = df_cleaned['win']

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=67)

In [14]:
from sklearn.metrics import mean_squared_error

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(mse)

0.2539724444842453


In [15]:
print(pd.DataFrame({"feature": X.columns, "beta": lr.coef_}))

                 feature      beta
0                  wards -0.000224
1               duration  0.000031
2  position_Position.JGL  0.023088
3  position_Position.MID  0.036382
4  position_Position.ADC  0.044136
5  position_Position.SUP  0.035441


In [16]:
df_cleaned.to_csv("d2_250_pwdw.csv", index=False)

In [40]:
unique_event_types = set()

for game in df['frames']:
    for minute in game:
        for event in minute['events']:
            unique_event_types.add(event['type'])

unique_event_types

{'BUILDING_KILL',
 'CHAMPION_KILL',
 'CHAMPION_SPECIAL_KILL',
 'CHAMPION_TRANSFORM',
 'DRAGON_SOUL_GIVEN',
 'ELITE_MONSTER_KILL',
 'FEAT_UPDATE',
 'GAME_END',
 'ITEM_DESTROYED',
 'ITEM_PURCHASED',
 'ITEM_SOLD',
 'ITEM_UNDO',
 'LEVEL_UP',
 'OBJECTIVE_BOUNTY_FINISH',
 'OBJECTIVE_BOUNTY_PRESTART',
 'PAUSE_END',
 'SKILL_LEVEL_UP',
 'TURRET_PLATE_DESTROYED',
 'WARD_KILL',
 'WARD_PLACED'}

In [97]:
def print_event(event_type: str):
    for game in df['frames']:
        for minute in game:
            for event in minute['events']:
                if event['type'] ==event_type:
                    print(event)
                    return
                
print_event("DRAGON_SOUL_GIVEN")

{'name': 'Mountain', 'teamId': 0, 'timestamp': 785111, 'type': 'DRAGON_SOUL_GIVEN'}


In [None]:
df.iloc[0]['participants_x'][9]['championName']
teamid = game['participants_x'][playeridx]['teamId']

'SUPPORT'

In [None]:
big_rows = [] 

for gameidx in range(len(df)):
    game = df.iloc[gameidx]
    for playeridx in range(10):
        pdata = game['participants_x'][playeridx]
        holdPings = pdata['holdPings']
        pushPings = pdata['pushPings']
        allInPings = pdata['allInPings']
        basicPings = pdata['basicPings']
        dangerPings = pdata['dangerPings']
        commandPings = pdata['commandPings']
        getBackPings = pdata['getBackPings']
        omwPings = pdata['onMyWayPings']
        retreatPings = pdata['retreatPings']
        assistPings = pdata['assistMePings']
        needVisionPings = pdata['needVisionPings']
        enemyVisionPings = pdata['enemyVisionPings']
        enemyMissingPings = pdata['enemyMissingPings']
        clearedPings = pdata['visionClearedPings']

        totalPings = holdPings + pushPings + allInPings + basicPings + dangerPings + commandPings + getBackPings + omwPings + retreatPings + assistPings + needVisionPings + enemyVisionPings + enemyMissingPings + clearedPings
        wardsPlaced = pdata['wardsPlaced']
        wardsKilled = pdata['wardsKilled']
        detectorPlaced = pdata['detectorWardsPlaced']
        cctime = pdata['timeCCingOthers']
        turretTakedowns = pdata['turretTakedowns']
        turretsLost = pdata['turretsLost']
        position = game['participants_x'][playeridx]['role']
        duration = game['gameDuration']
        champion = pdata['championName']

        win = game['participants_x'][playeridx]['win']

        big_rows.append({'missingPings':enemyMissingPings, 'totalPings': totalPings, "wardsPlaced": wardsPlaced, 
                         'wardsKilled': wardsKilled, 'detectorsPlaced': detectorPlaced, 'position':position, 
                         'cctime': cctime, 'turretTakedowns': turretTakedowns, 'turretsLost': turretsLost,
                         'position': position, 'duration': duration, 'champ': champion, 'win':win})

df_big = pd.DataFrame(big_rows)
df_big["position"] = df_big["position"].astype("category")

df_big.head(3)

Unnamed: 0,missingPings,totalPings,wardsPlaced,wardsKilled,detectorsPlaced,position,cctime,turretTakedowns,turretsLost,duration,champ,win
0,4,8,2,2,1,SOLO,28,1,11,1824,Sion,False
1,3,37,1,9,0,NONE,15,0,11,1824,Sylas,False
2,2,5,10,6,1,CARRY,26,3,11,1824,Caitlyn,False


In [100]:
df_big.to_csv("d2_250_big.csv", index=False)

wonder if we could divide the game up in to a set of "Plays", each with their own statistics (like events, wards placed, etc) and create a model that gives you feedback on each "Play", calculating which contributes to the win/loss the most.

In [10]:
df['participants_x'][30][5]['win']

# X: participant role (1-5), champion, game duration, wards placed
# Y: participant win (0/1)

True