In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.inspection import permutation_importance

In [2]:
look_events=["CHAMPION_SPECIAL_KILL","CHAMPION_KILL","ELITE_MONSTER_KILL","BUILDING_KILL"] 

minute = 10

folder_path = "data/IRON-Timeline" #Change to your folder path


In [3]:
#PARTICIPANT FRAMES

def part_frames(json_file):
    
    data = pd.DataFrame(json_file["info"]["frames"][minute]["participantFrames"])
    
    data = data.transpose().drop(columns=["currentGold", "goldPerSecond", "participantId", "position", "timeEnemySpentControlled", "level"])
    data.reset_index(drop=True, inplace=True)

    damage_stats = pd.json_normalize(data["damageStats"])
    champion_stats = pd.json_normalize(data["championStats"])

    pf_all = pd.concat([data, damage_stats, champion_stats], axis=1).drop(columns=["championStats","damageStats"])
    pf_all = pf_all.astype(int)

    conditions = [
            (pf_all.index >= 0) & (pf_all.index <= 4),
            (pf_all.index >= 5) & (pf_all.index <= 9)
        ]
    values = [100, 200]

    pf_all['team'] = np.select(conditions, values, default=0)

    pf_all = pf_all.groupby("team").sum().reset_index()

    return pf_all


#EVENTS & TARGET

def find_indices(data):
    monster_indices = []
    for index, item in enumerate(data):
        if item.get('type') in look_events:
            monster_indices.append(index)
    return monster_indices

def get_events(json_file):
    elements_filtered = []
    for i in range(minute + 1):
        try:
            events = json_file["info"]["frames"][i]["events"]
        except IndexError:
            break
        monster_indices = find_indices(events)
        if monster_indices:
            new_elements = [events[e] for e in monster_indices]
            elements_filtered.extend(new_elements)

    events_filtered = pd.DataFrame(elements_filtered)
    return events_filtered


def transform_events(json_file):
    required_columns = ["killerId", "type", "killType", "monsterSubType", "monsterType", "towerType"]

    events = get_events(json_file)

    missing_columns = [col for col in required_columns if col not in events.columns]

    for column in missing_columns:
        events[column] = np.nan

    events = events[required_columns]
    events = events[events["killerId"] != 0]

    events.loc[events["monsterType"] == "DRAGON", "monsterType"] = events.loc[events["monsterType"] == "DRAGON", "monsterSubType"]

    events = events.drop(columns=["monsterSubType"])

    events["kills"] = (events["type"] == "CHAMPION_KILL").astype(int)

    one_hot_encoded = pd.get_dummies(events[['killType', "monsterType", "towerType"]])

    events_encoded = pd.concat([events[['killerId', "kills"]], one_hot_encoded], axis=1)
    
    if 'killType' in events_encoded.columns:
        events_encoded.drop(columns=['killType'], inplace=True)
    if 'monsterType' in events_encoded.columns:
        events_encoded.drop(columns=['monsterType'], inplace=True)
    if 'towerType' in events_encoded.columns:
        events_encoded.drop(columns=['towerType'], inplace=True)
    
    events_encoded = events_encoded.groupby("killerId").sum().reset_index()

    events_encoded["killerId"] = events_encoded["killerId"] - 1

    events_encoded = events_encoded.groupby("killerId").sum().reset_index()

    conditions = [
        (events_encoded['killerId'] >= 0) & (events_encoded['killerId'] <= 4),
        (events_encoded['killerId'] >= 5) & (events_encoded['killerId'] <= 9)
    ]
    values = [100, 200]

    events_encoded['team'] = np.select(conditions, values, default=0)

    events_encoded = events_encoded.groupby("team").sum().reset_index().drop(columns=["killerId"])
    
    return events_encoded


def merge_dfs(json_file):
    events = transform_events(json_file)
    frames = part_frames(json_file)
    
    dfs = frames.merge(events,how="left",on="team")
    
    dfs["matchId"] = json_file["metadata"]["matchId"]
    
    last_event = json_file["info"]["frames"][-1]["events"][-1]
    dfs["target"] = dfs["team"].apply(lambda x: 1 if x == last_event.get("winningTeam") else 0)
    
    dfs.drop(columns=["team"],inplace=True)
    
    return dfs



In [4]:
#LOOP PARA TODOS LOS JSON


all_events = None


for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        file_path = os.path.join(folder_path, filename)
        
        json_file = pd.read_json(file_path)
           
        try:             
            if all_events is None:
                all_events = merge_dfs(json_file)
            else:
                all_events = pd.concat([all_events, merge_dfs(json_file)], ignore_index=True)
        
        except IndexError:
            continue
     
columns_to_convert = all_events.columns[~all_events.columns.isin(['matchId',"target"])]   
    
all_events[columns_to_convert] = all_events[columns_to_convert].fillna(0).astype(int) 

In [5]:
#DIFERENCIAS ENTRE PARTIDAS


# Seleccionar todas las columnas excepto "matchId y"
columnas_no_matchId = all_events.columns[~ all_events.columns.isin(["matchId", "target"])]

# Seleccionar solo las columnas numéricas
columnas_numericas = all_events.select_dtypes(include=[np.number]).drop(columns = "target")

# Crear un nuevo DataFrame vacío con la misma columna "matchId, target" y las columnas numéricas
all_events_diff = pd.DataFrame(columns=["matchId", "target"] + list(columnas_numericas.columns))

# Obtener el número total de filas en df_trabajo
total_filas = columnas_numericas.shape[0]

# Iterar a través de las filas de columnas_numericas
for i in range(0, total_filas - 1, 2):
    # Obtener las dos filas a restar
    fila1 = columnas_numericas.iloc[i]
    fila2 = columnas_numericas.iloc[i+1]
    
    # Calcular la resta de las dos filas
    resta_filas = fila1 - fila2
    
    # Agregar la resta al nuevo DataFrame df_partida_diff
    all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)

# Asignar el primer valor de las columnas booleanas en cada fila del nuevo DataFrame
for columna in all_events.select_dtypes(include=[bool]):
    all_events_diff[columna] = all_events[columna].iloc[::2].reset_index(drop=True)

# Asignar el primer valor de la columna "matchId" en cada fila del nuevo DataFrame
all_events_diff["matchId"] = all_events["matchId"].iloc[::2].reset_index(drop=True)

# Asignar el primer valor de la columna "target" en cada fila del nuevo DataFrame
all_events_diff["target"] = all_events["target"].iloc[::2].reset_index(drop=True)

  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)
  all_events_diff = all_e

In [73]:
pd.set_option('display.max_columns', None)

def timba(all_events_diff):

    # Get columns that start with 'monsterType'
    monster_type_cols = all_events_diff.filter(like='monsterType', axis=1).columns.tolist()

    # Get columns that start with 'towerType'
    tower_type_cols = all_events_diff.filter(like='towerType', axis=1).columns.tolist()

    # Get columns that start with 'killType'
    kill_type_cols = all_events_diff.filter(like='killType', axis=1).columns.tolist()

    # Combine the columns into a single list
    all_cols = monster_type_cols + tower_type_cols + kill_type_cols + ["target","minionsKilled","totalGold"]

    all_df = all_events_diff[all_cols]

    return all_df

all_df = timba(all_events_diff)

In [74]:
def duplicates(df):
    print(f"Duplicates droped: {df.duplicated().sum()}")

    df = df.drop_duplicates()

    return df

def scale(df):
    # Standard or Robust if there are many outliers
    scaler = RobustScaler()
    
    scaled_array = scaler.fit_transform(df)
    scaled_df = pd.DataFrame(scaled_array, columns=df.columns)
    
    return scaled_df

In [75]:
def preprop(df):
    
    df = duplicates(df)
    
    df = scale(df)
    
    return pd.DataFrame(df)

In [76]:
X = all_df.drop(columns="target")
y = all_df["target"]


X_preprop = preprop(X)

Duplicates droped: 0


In [77]:
X_preprop

Unnamed: 0,monsterType_FIRE_DRAGON,monsterType_RIFTHERALD,monsterType_HEXTECH_DRAGON,monsterType_WATER_DRAGON,monsterType_AIR_DRAGON,monsterType_EARTH_DRAGON,monsterType_CHEMTECH_DRAGON,towerType_OUTER_TURRET,towerType_INNER_TURRET,killType_KILL_FIRST_BLOOD,killType_KILL_MULTI,killType_KILL_ACE,minionsKilled,totalGold
0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.155556,-1.537749
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.533333,-0.754350
2,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.088889,-0.596010
3,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.600000,0.193136
4,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.600000,-1.023144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4730,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.0,1.311111,0.313168
4731,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.133333,-0.780846
4732,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.688889,-0.062251
4733,0.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.0,-0.622222,-1.299282


In [78]:
# Instantiate model
log_reg = LogisticRegression(max_iter=10000)

# Scoring on multiple folds aka Cross Validation
scores = cross_val_score(log_reg, X_preprop, y, cv=10)
print(f"Score:{scores.mean()}")

# Fit model
log_model = LogisticRegression().fit(X_preprop, y)

# Performs Permutation
permutation_score = permutation_importance(log_model, X_preprop, y, n_repeats=10)

# Unstack results showing the decrease in performance after shuffling features
importance_df = pd.DataFrame(np.vstack((X_preprop.columns,
                                        permutation_score.importances_mean)).T)
importance_df.columns=['feature','score decrease']

# Show the important features
importance_df.sort_values(by="score decrease", ascending = False)

Score:0.7294506739458166


Unnamed: 0,feature,score decrease
13,totalGold,0.173115
5,monsterType_EARTH_DRAGON,0.008279
12,minionsKilled,0.007138
3,monsterType_WATER_DRAGON,0.006061
0,monsterType_FIRE_DRAGON,0.005977
2,monsterType_HEXTECH_DRAGON,0.003126
4,monsterType_AIR_DRAGON,0.002619
6,monsterType_CHEMTECH_DRAGON,0.00188
1,monsterType_RIFTHERALD,0.000781
7,towerType_OUTER_TURRET,0.000401


In [79]:
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.linear_model import LogisticRegression

#Split the dataset in train / test
X_train, X_test, y_train, y_test = train_test_split(X_preprop, y, test_size=0.3, random_state=88)

log_reg = LogisticRegression(max_iter=10000)

log_model = LogisticRegression().fit(X_train, y_train)


In [80]:
scores = cross_val_score(log_reg, X_train, y_train, cv=10)
print(f"Score:{scores.mean()}")

Score:0.7302315000181997


In [81]:
train_score = log_model.score(X_train, y_train)
print("Train Accuracy:", train_score)

Train Accuracy: 0.7302353651176826


In [82]:
#Evaluar el modelo en los datos de prueba: 
test_score = log_model.score(X_test, y_test)
print("Test Accuracy:", test_score)

Test Accuracy: 0.7227304714989444


In [83]:
from sklearn.metrics import accuracy_score

y_pred = log_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7227304714989444


In [86]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear', 'saga']
}

# Create a logistic regression model
model = LogisticRegression(max_iter=5000)

# Create a grid search object
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',  # Evaluation metric
    cv=5  # Number of cross-validation folds
)

# Perform the grid search
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and best score
print("Best Hyperparameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)

Best Hyperparameters:  {'C': 0.1, 'penalty': 'l2', 'solver': 'saga'}
Best Score:  0.731137874624635
