In [1]:
import os
from dotenv import load_dotenv
import pandas as pd

from src.datasetgenerator import StratzQuery, DatasetGenerator

In [2]:
load_dotenv()
#querier = StratzQuery(os.getenv('STRATZ_TOKEN'))
#querier.get_match(7590822094)

csv_path = os.getenv('CSV_PATH')

if not os.getenv('STRATZ_TOKEN'):
    raise FileNotFoundError("Not found STRATZ_TOKEN on .env")

generator = DatasetGenerator(os.getenv('STRATZ_TOKEN'), csv_path)

In [3]:

league = generator.get_professional_league(16842)
matches = league['matches']
len(matches)

43

In [4]:
matches[0].keys()

dict_keys(['id', 'didRadiantWin', 'durationSeconds', 'startDateTime', 'firstBloodTime', 'averageRank', 'players', 'playbackData', 'averageImp', 'actualRank', 'radiantKills', 'direKills', 'radiantTeam', 'direTeam'])

In [5]:
matches[0]['playbackData']['wardEvents']

[{'indexId': 450,
  'time': -63,
  'positionX': 126,
  'positionY': 126,
  'fromPlayer': 4,
  'wardType': 'OBSERVER',
  'action': 'SPAWN',
  'playerDestroyed': None},
 {'indexId': 454,
  'time': -62,
  'positionX': 154,
  'positionY': 100,
  'fromPlayer': 2,
  'wardType': 'OBSERVER',
  'action': 'SPAWN',
  'playerDestroyed': None},
 {'indexId': 459,
  'time': -53,
  'positionX': 120,
  'positionY': 122,
  'fromPlayer': 128,
  'wardType': 'OBSERVER',
  'action': 'SPAWN',
  'playerDestroyed': None},
 {'indexId': 517,
  'time': -17,
  'positionX': 138,
  'positionY': 82,
  'fromPlayer': 129,
  'wardType': 'OBSERVER',
  'action': 'SPAWN',
  'playerDestroyed': None},
 {'indexId': 548,
  'time': -9,
  'positionX': 128,
  'positionY': 74,
  'fromPlayer': 130,
  'wardType': 'SENTRY',
  'action': 'SPAWN',
  'playerDestroyed': None},
 {'indexId': 854,
  'time': 14,
  'positionX': 168,
  'positionY': 92,
  'fromPlayer': 2,
  'wardType': 'SENTRY',
  'action': 'SPAWN',
  'playerDestroyed': None},
 

In [6]:
def get_deaths_through_ward(match:dict):

    # if match[']
    deaths_through_ward = {
        'time': [],
        'isPlayerRadiant': [],
        'positionX': [],
        'positionY': [],
    }
    for player in match["players"]:
        isPlayerRadiant = player['isRadiant']
        deaths_events = player['playbackData']['deathEvents']
        for d_e in deaths_events:
            if not d_e['isWardWalkThrough']:
                continue
            deaths_through_ward['time'].append(d_e['time'])
            deaths_through_ward['isPlayerRadiant'].append(isPlayerRadiant)
            deaths_through_ward['positionX'].append(d_e['positionX'])
            deaths_through_ward['positionY'].append(d_e['positionY'])
    
    df_deaths = pd.DataFrame.from_dict(deaths_through_ward)
    df_deaths['match'] = match['id']
    return df_deaths

def get_deaths_through_ward_league(league_id):
    
    league = generator.get_professional_league(league_id=league_id)
    matches = league['matches']

    df_deaths = pd.concat([get_deaths_through_ward(match) for match in matches])
    return df_deaths

df_deaths = get_deaths_through_ward_league(16842)
df_deaths

Unnamed: 0,time,isPlayerRadiant,positionX,positionY,match
0,1494,True,182,60,7811671022
1,1418,True,158,70,7811671022
2,1512,True,174,64,7811671022
3,829,True,152,64,7811671022
4,1487,True,194,70,7811671022
...,...,...,...,...,...
27,479,False,174,96,7803951479
28,786,False,170,92,7803951479
29,1363,False,150,148,7803951479
30,1781,False,66,192,7803951479


In [7]:
from collections import defaultdict

def get_match_wards(match:dict) -> dict:
    player_team_by_slot = {player['playerSlot']: player['isRadiant'] for player in match['players']}
    match_id = match['id']
    didRadiantWin = match['didRadiantWin']
    ward_events = match['playbackData']['wardEvents']
    wards = defaultdict(lambda: {})
    for w_e in ward_events:
        if w_e["action"] == "SPAWN":
            wards[w_e['indexId']] = {
                "id": f"{match_id}_{w_e['indexId']}",
                "match": match_id,
                "spawned_time": w_e["time"],
                "despawned_time": None,
                "positionX": w_e["positionX"],
                "positionY": w_e["positionY"],
                "wardType": w_e["wardType"],
                "isRadiant": player_team_by_slot[w_e["fromPlayer"]],
                "playerDestroyed": w_e["playerDestroyed"],
                "didRadiantWin": didRadiantWin,
            }
            continue
        wards[w_e['indexId']]["despawned_time"] = w_e['time']
    

    # get kills around
    
    return dict(wards)

wards = get_match_wards(matches[0])
wards.keys(), len(wards.keys())

(dict_keys([450, 454, 459, 517, 548, 854, 1877, 1884, 2217, 2224, 2491, 2257, 881, 2227, 2383, 2400, 1656, 2215, 1950, 1683, 1126, 2487, 2146, 2457, 2405, 2047, 2425, 2451, 2088, 1040, 2551, 814, 2226, 2177, 2128, 1918, 2480, 2209, 856, 1848, 920, 1598, 747, 2508, 1720, 2529, 857, 1862, 1554, 2090, 2422, 1672, 541, 921, 1054, 2361, 2397, 1743, 2554, 866, 228, 261, 230, 2132, 2447, 2170, 746, 2152, 848, 2049, 1773, 2038, 2338, 1925, 1858, 1599, 1566, 1810, 2396, 2331, 2079, 2534, 1959, 231, 1926, 2134, 2308, 1768, 2273, 2406, 879, 1631, 452, 461, 775, 1624, 1668, 1713, 1736, 1876, 1775]),
 101)

In [8]:
from collections import defaultdict

def map_death_count(df_wards, df_deaths):
    death_time = df_deaths['time'].item()
    return (df_wards['spawned_time'] < death_time) & (death_time > df_wards['despawned_time'])

def get_df_match_wards(match:dict) -> dict:
    if not match['playbackData']:
        print(f"warning: no playbackData found for match {match['id']}")
        return
    wards = get_match_wards(match)
    df_wards = defaultdict(lambda: [])
    for id, w in wards.items():
        for key, item in w.items():
            df_wards[key].append(item)
    df_wards = pd.DataFrame.from_dict(df_wards).set_index("id")

    none_despawned_mask = df_wards['despawned_time'].isna()
    df_wards.loc[none_despawned_mask, 'despawned_time'] = match['durationSeconds']

    # df_deaths = get_deaths_through_ward(match)
    
    # df_wards['possible_enemies_death'] = 0
    # return map_death_count(df_wards, df_deaths)

    
    # df_wards['possible_enemies_death'] = 0


    df_wards['spawned_time_minute'] = (df_wards['spawned_time'] // 60).astype(int)
    df_wards['despawned_time_minute'] = (df_wards['despawned_time'] // 60).astype(int)
    
    df_wards['radiantTeam'] = match['radiantTeam']['name']
    df_wards['direTeam'] = match['direTeam']['name']


    return df_wards

def get_league_df_wards(league_id):
    league = generator.get_professional_league(league_id)
    matches = league['matches']
    ward_matches_df = [get_df_match_wards(match) for match in matches]
    df_wards = pd.concat(ward_matches_df)
    df_wards['league'] = league_id
    df_wards['region'] = league['region']
    return df_wards

df_wards = get_league_df_wards(16842)
df_wards.isna().any()

match                    False
spawned_time             False
despawned_time           False
positionX                False
positionY                False
wardType                 False
isRadiant                False
playerDestroyed           True
didRadiantWin            False
spawned_time_minute      False
despawned_time_minute    False
radiantTeam              False
direTeam                 False
league                   False
region                   False
dtype: bool

In [9]:
df_wards = df_wards[df_wards['wardType'] == 'OBSERVER'].drop(["wardType", "playerDestroyed"], axis=1)

In [10]:
df_wards.isna().any()

match                    False
spawned_time             False
despawned_time           False
positionX                False
positionY                False
isRadiant                False
didRadiantWin            False
spawned_time_minute      False
despawned_time_minute    False
radiantTeam              False
direTeam                 False
league                   False
region                   False
dtype: bool

In [11]:
df_wards[['positionX', 'positionY']].describe()

Unnamed: 0,positionX,positionY
count,1593.0,1593.0
mean,126.284997,123.460138
std,29.045046,32.108638
min,58.0,58.0
25%,104.0,98.0
50%,124.0,122.0
75%,146.0,146.0
max,192.0,196.0


In [12]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

# bg_img = np.array(Image.open('assets\Game_map_7.33.webp').resize((500,500)))
# # bg_img = mpimg.imread('assets\Game_map_7.33.webp')
# print(type(bg_img), bg_img.shape)
# for name, group in df_wards.groupby('match'):

#     print(f"\n{group['match'].unique()} - {group['radiantTeam'].unique()} x {group['direTeam'].unique()}")
#     fig, ax = plt.subplots(figsize=(5, 5), dpi=100)
#     ax.imshow(bg_img, extent=[55, 200, 55, 200])  # Adjust extent to match your data range
#     ax.scatter(group['positionX'], group['positionY'], label=f'Match {name}', color=group['wardType'].map(lambda x: "yellow" if x == "OBSERVER" else "blue"))

#     # Show plot
#     plt.show()


In [13]:
df_wards.count().max(), df_deaths.count().max()

(1593, 1189)

In [14]:
df_wards[['spawned_time', 'despawned_time']]

Unnamed: 0_level_0,spawned_time,despawned_time
id,Unnamed: 1_level_1,Unnamed: 2_level_1
7811671022_450,-63,296.0
7811671022_454,-62,297.0
7811671022_459,-53,306.0
7811671022_517,-17,342.0
7811671022_881,283,347.0
...,...,...
7803951479_1742,2757,2766.0
7803951479_472,2795,3080.0
7803951479_2962,2895,2954.0
7803951479_2072,2943,3080.0


In [15]:
df_wards = df_wards.reset_index()
df_wards

Unnamed: 0,id,match,spawned_time,despawned_time,positionX,positionY,isRadiant,didRadiantWin,spawned_time_minute,despawned_time_minute,radiantTeam,direTeam,league,region
0,7811671022_450,7811671022,-63,296.0,126,126,True,True,-2,4,Tundra Esports,PSG.Quest,16842,EUROPE
1,7811671022_454,7811671022,-62,297.0,154,100,True,True,-2,4,Tundra Esports,PSG.Quest,16842,EUROPE
2,7811671022_459,7811671022,-53,306.0,120,122,False,True,-1,5,Tundra Esports,PSG.Quest,16842,EUROPE
3,7811671022_517,7811671022,-17,342.0,138,82,False,True,-1,5,Tundra Esports,PSG.Quest,16842,EUROPE
4,7811671022_881,7811671022,283,347.0,122,128,False,True,4,5,Tundra Esports,PSG.Quest,16842,EUROPE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1588,7803951479_1742,7803951479,2757,2766.0,142,122,False,True,45,46,Tundra Esports,NAVI Junior,16842,EUROPE
1589,7803951479_472,7803951479,2795,3080.0,146,72,True,True,46,51,Tundra Esports,NAVI Junior,16842,EUROPE
1590,7803951479_2962,7803951479,2895,2954.0,138,136,False,True,48,49,Tundra Esports,NAVI Junior,16842,EUROPE
1591,7803951479_2072,7803951479,2943,3080.0,182,140,False,True,49,51,Tundra Esports,NAVI Junior,16842,EUROPE


In [16]:
def create_time_rows(row):
    return [{'time': t, **row} for t in range(row['spawned_time_minute'], row['despawned_time_minute']) if row['id']]
# Apply the function and expand the DataFrame
df_expanded = df_wards.apply(create_time_rows, axis=1).explode().reset_index(drop=True)

# Convert the expanded rows from dictionaries back to a DataFrame
df_expanded = pd.DataFrame([row for row in df_expanded.tolist() if isinstance(row, dict)])

# Drop the original columns (optional)
df_expanded.drop(['spawned_time', 'despawned_time','spawned_time_minute', 'despawned_time_minute'], axis=1, inplace=True)

# Print the expanded DataFrame
df_expanded

Unnamed: 0,time,id,match,positionX,positionY,isRadiant,didRadiantWin,radiantTeam,direTeam,league,region
0,-2,7811671022_450,7811671022,126,126,True,True,Tundra Esports,PSG.Quest,16842,EUROPE
1,-1,7811671022_450,7811671022,126,126,True,True,Tundra Esports,PSG.Quest,16842,EUROPE
2,0,7811671022_450,7811671022,126,126,True,True,Tundra Esports,PSG.Quest,16842,EUROPE
3,1,7811671022_450,7811671022,126,126,True,True,Tundra Esports,PSG.Quest,16842,EUROPE
4,2,7811671022_450,7811671022,126,126,True,True,Tundra Esports,PSG.Quest,16842,EUROPE
...,...,...,...,...,...,...,...,...,...,...,...
7022,50,7803951479_472,7803951479,146,72,True,True,Tundra Esports,NAVI Junior,16842,EUROPE
7023,48,7803951479_2962,7803951479,138,136,False,True,Tundra Esports,NAVI Junior,16842,EUROPE
7024,49,7803951479_2072,7803951479,182,140,False,True,Tundra Esports,NAVI Junior,16842,EUROPE
7025,50,7803951479_2072,7803951479,182,140,False,True,Tundra Esports,NAVI Junior,16842,EUROPE


In [17]:
df_expanded.dtypes

time              int64
id               object
match             int64
positionX         int64
positionY         int64
isRadiant          bool
didRadiantWin      bool
radiantTeam      object
direTeam         object
league            int64
region           object
dtype: object

In [18]:
train_data = df_expanded.copy()
train_data["win"] = train_data['isRadiant'] == train_data['didRadiantWin']
train_data.drop(['id', 'match', 'didRadiantWin', 'radiantTeam', 'direTeam', 'league', 'region'], axis=1, inplace=True)
train_data['isRadiant'] = train_data['isRadiant'].astype(int)
train_data['win'] = train_data['win'].astype(int)
train_data

Unnamed: 0,time,positionX,positionY,isRadiant,win
0,-2,126,126,1,1
1,-1,126,126,1,1
2,0,126,126,1,1
3,1,126,126,1,1
4,2,126,126,1,1
...,...,...,...,...,...
7022,50,146,72,1,1
7023,48,138,136,0,0
7024,49,182,140,0,0
7025,50,182,140,0,0


In [19]:
def create_time_rows(row):
    return [{'time': t, **row} for t in range(row['spawned_time_minute'], row['despawned_time_minute']) if row['id']]

In [20]:
def get_dataset():
    df_wards = get_league_df_wards(16842)
    
    df_wards = df_wards[df_wards['wardType'] == 'OBSERVER'].drop(["wardType", "playerDestroyed"], axis=1)
    
    # Apply the function and expand the DataFrame
    df_expanded = df_wards.apply(create_time_rows, axis=1).explode().reset_index(drop=True)

    # Convert the expanded rows from dictionaries back to a DataFrame
    df_expanded = pd.DataFrame([row for row in df_expanded.tolist() if isinstance(row, dict)])

    # Drop the original columns (optional)
    df_expanded.drop(['spawned_time', 'despawned_time','spawned_time_minute', 'despawned_time_minute'], axis=1, inplace=True)

    # Print the expanded DataFrame
    df_expanded

    
    train_data = df_expanded.copy()
    train_data["win"] = train_data['isRadiant'] == train_data['didRadiantWin']
    train_data.drop(['id', 'match', 'didRadiantWin', 'radiantTeam', 'direTeam', 'league', 'region'], axis=1, inplace=True)
    train_data['isRadiant'] = train_data['isRadiant'].astype(int)
    train_data['win'] = train_data['win'].astype(int)

In [21]:
import numpy as np
from sklearn.model_selection import train_test_split

X = np.array(train_data.drop(["win"], axis=1))
y = np.array(train_data['win'])

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)
X_train.shape, X_test.shape

((4918, 4), (2109, 4))

In [22]:
from sklearn.tree import DecisionTreeClassifier

classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)


In [23]:
classifier.predict_proba(X)

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [0., 1.]])

In [24]:
sum(classifier.predict(X_test) == y_test) / y_test.shape

array([0.80464675])