### League of Legends Match Predictor


Goal: Predict the outcome of a LoL match (win/loss for a team) based on historical game statistics and player/team features using machine learning techniques.

Objectives:

1. Collect and preprocess match data from Riot Games API or Kaggle datasets

2. Perform exploratory data analysis (EDA) and feature engineering

3. Build predictive models using machine learning algorithms (e.g., Random Forest, XGBoost)

4. Evaluate models using accuracy, precision, recall, and F1-score

5. Deploy a simple interface to predict match outcomes from new game data


### Data Loading and Preprocessing


In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt


# Load dataset
dataset = pd.read_csv("league_of_legends_data_large.csv")

# Features and target
X = dataset.drop("win", axis=1)
y = dataset["win"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test  = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)



### Logistic Regression Model 


In [None]:
class LogisticRegressionModel(nn.Module):
    def __init__(self, n_inputs):
        super().__init__()
        self.linear = nn.Linear(n_inputs, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))


input_dim = X_train.shape[1]

model = LogisticRegressionModel(input_dim)
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)



### Model Training 


In [None]:
epochs = 1000
train_losses = []
test_losses = []

for epoch in range(epochs):
    # Training
    model.train()
    optimizer.zero_grad()

    train_outputs = model(X_train)
    train_loss = criterion(train_outputs, y_train)

    train_loss.backward()
    optimizer.step()
    train_losses.append(train_loss.item())

    # Evaluation
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        test_loss = criterion(test_outputs, y_test)
        test_losses.append(test_loss.item())

    if (epoch + 1) % 100 == 0:
        print(
            f"Epoch [{epoch+1}/{epochs}] "
            f"Train Loss: {train_loss.item():.4f} "
            f"Test Loss: {test_loss.item():.4f}"
        )

### Model Optimization and Evaluation 


In [None]:
model.eval()
with torch.no_grad():
    train_pred = (model(X_train) >= 0.5).float()
    test_pred  = (model(X_test) >= 0.5).float()

    train_acc = (train_pred == y_train).float().mean().item()
    test_acc  = (test_pred == y_test).float().mean().item()

print(f"Training Accuracy: {train_acc*100:.2f}%")
print(f"Testing Accuracy:  {test_acc*100:.2f}%")


from sklearn.metrics import classification_report

print("\nClassification Report (Test Set):")
print(
    classification_report(
        y_test.numpy(),
        test_pred.numpy(),
        target_names=["Loss", "Win"]
    )
)

### Optimized Training (Momentum + Weight Decay)


In [None]:
optimizer = optim.SGD(
    model.parameters(),
    lr=0.001,
    momentum=0.9,
    weight_decay=0.01
)

epochs = 1000

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{epochs}] Loss: {loss.item():.4f}")

### Saving Model


In [None]:
torch.save(model.state_dict(), "league_match_predictor.pth")

# Load model
loaded_model = LogisticRegressionModel(input_dim)
loaded_model.load_state_dict(torch.load("league_match_predictor.pth"))
loaded_model.eval()

with torch.no_grad():
    reload_pred = (loaded_model(X_test) >= 0.5).float()
    reload_acc = (reload_pred == y_test).float().mean().item()

print(f"Reloaded Model Accuracy: {reload_acc*100:.2f}%")

### Hyperparameter Tuning 


In [None]:
learning_rates = [0.01, 0.05, 0.1]
accuracies = {}

for lr in learning_rates:
    model = LogisticRegressionModel(input_dim)
    optimizer = optim.SGD(
        model.parameters(), lr=lr, momentum=0.9, weight_decay=0.01
    )

    for _ in range(100):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        preds = (model(X_test) >= 0.5).float()
        acc = (preds == y_test).float().mean().item()
        accuracies[lr] = acc

    print(f"LR {lr}: Test Accuracy = {acc*100:.2f}%")

best_lr = max(accuracies, key=accuracies.get)
print(f"Best Learning Rate: {best_lr}")

### Feature Importance 


In [None]:
best_model = LogisticRegressionModel(input_dim)
optimizer = optim.SGD(
    best_model.parameters(), lr=best_lr, momentum=0.9, weight_decay=0.01
)

for _ in range(100):
    optimizer.zero_grad()
    outputs = best_model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

# Extract feature importance
# Positive weights increase win probability
# Negative weights decrease win probability

weights = best_model.linear.weight.detach().numpy().flatten()

feature_importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": weights
}).sort_values(by="Importance", ascending=False)

print(feature_importance.head(10))



### Visualization


In [None]:
top_features = feature_importance.head(20)

plt.figure(figsize=(10, 6))
plt.barh(top_features["Feature"], top_features["Importance"])
plt.gca().invert_yaxis()
plt.title("Top 20 Feature Importances")
plt.xlabel("Importance")
plt.show()

