<a href="https://colab.research.google.com/github/gmansil/LoLPredicter/blob/main/LeaguePredicter_pynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python import train
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [17]:
# Import dataset
mRegion = pd.read_csv("/content/4mRegionsClean.csv")

In [18]:
# Create subset of data to use for DataFrame of results and predicted later
matches = mRegion[["V16", "V8", "V25", "Opponents"]]

In [19]:
# Cleaning dataset of strings
mRegion1 = mRegion.drop(["Unnamed: 0", "V1", "V2", "V3", "V5", "V6", "V7", "V9", "V36", "V37", "V38", "V52", "V72", "V85", "V88", "V91", "V92",
                          "V10", "V11", "V13", "V14", "V15", "V17", "V18", "Opponents"], axis=1)

In [20]:
# Create rolling averages based on team passed in
def rollingAverages(team, cols, newCols):
    team = team.sort_values("V8")
    rollingStats = team[cols].rolling(32, closed="left").mean()
    team[newCols] = rollingStats
    team = team.dropna(subset=newCols)
    return team

In [21]:
# Create subset of columns to run data on
ct2 = ["V24", "V26", "V27", "V28", "V29", "V30", "V31", "V32", "V33", "V34", "V35","V39","V40", "V41", "V42",
      "V43", "V46", "V47", "V48", "V49", "V53", "V54", "V55", "V56", "V57", "V58", "V59", "V60", "V61", "V62", "V63", "V64",
      "V65", "V68", "V69", "V70", "V71", "V73", "V74", "V75", "V76", "V77", "V78", "V79", "V80", "V81", "V82", "V83", "V84", "V86", "V87",
      "V89", "V90"]

# Create rolling columns
ct3 = [f"{c}_rolling" for c in ct2]

# Making results count as an int
mRegion1["V25"] = (mRegion1["V25"] == 1).astype("int")

# Create rolling values per team's data
matchesRolling = mRegion1.groupby("V16").apply(lambda a: rollingAverages(a, ct2, ct3))
matchesRolling = matchesRolling.droplevel("V16")

# Create a column transformer 
ct = make_column_transformer(
    (MinMaxScaler(), ct3), # turn all values in these columns between 0 and 1
    (OneHotEncoder(handle_unknown="error"), ["V12", "V4", "V16"]))

# Create X and Y 
x = matchesRolling.drop(["V25"], axis=1)
y = matchesRolling["V25"]

# Build our train and test sets 
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size=0.2, random_state=42)

# Fit the column transformer to our training data
ct.fit(xTrain)

# Transform training and test data with normalization (MinMaxScaler) and OneHotEncoder
xTrainNormal = ct.transform(xTrain)
xTestNormal = ct.transform(xTest)

In [22]:
# Build a neural network model for our data
# Set the random seed
tf.random.set_seed(42)

# 1. Create a model
leaguePredicter = tf.keras.Sequential([
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

# 2. Compile the model
leaguePredicter.compile(loss=tf.keras.losses.BinaryCrossentropy(),
               optimizer=tf.keras.optimizers.Adam(),
               metrics=["accuracy"])

# 3. Fit the model
history = leaguePredicter.fit(xTrainNormal, yTrain, epochs=150, verbose=0)

In [23]:
# Evaluate league predicter model trained on normalized data
leaguePredicterEvaluation = leaguePredicter.evaluate(xTestNormal, yTest)



In [24]:
# Record model's predictions
yPreds = leaguePredicter.predict(xTestNormal)

# Convert from Prediction Probability to Integer
yPredsRounded = tf.round(yPreds)

In [25]:
# Create variables to use in DataFrame
actual = yTest
predicted = pd.Series(tf.squeeze(tf.constant(yPredsRounded))).astype("int")

# Set predicted index to match yTest
predicted.index = yTest.index

In [26]:
# Create DataFrame to compare predictions to actual results.
combined = pd.DataFrame(dict(actual=actual, predicted=predicted))

In [27]:
# Better display
combinedAttempt = combined.merge(matches[["V16", "V8", "V25", "Opponents"]], left_index=True, right_index=True)

In [28]:
# Set up data to compare win rate per team
mergedData = combinedAttempt.merge(combinedAttempt, left_on=["V8", "V16"], right_on=["V8","Opponents"])

In [29]:
mergedData[(mergedData["predicted_x"] == 1) & (mergedData["predicted_y"] == 0)]["actual_x"].value_counts() 

1    7
0    2
Name: actual_x, dtype: int64