In [1]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, r2_score


device = 'mps' if torch.mps.is_available() else 'cpu'
print(device)

mps


In [2]:
data_df = pd.read_csv("second_iteration/clean_data.csv")
data_df.dropna(inplace=True)
original_df = data_df.copy()

In [None]:
x = data_df.drop(["blue_score", "red_score"], axis=1).values
y = data_df[["blue_score", "red_score"]].values
x

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15,)
x_train

In [None]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [6]:
class dataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float32).to(device)
        self.y = torch.tensor(y, dtype=torch.float32).to(device)
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [7]:
train_data = dataset(x_train, y_train)
test_data = dataset(x_test, y_test)

In [8]:
BATCH_SIZE = 8

In [9]:
train_dl = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dl = DataLoader(test_data, batch_size=BATCH_SIZE, )

In [10]:
class MyNet(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2) 
        )

    def forward(self, x):
        return self.model(x)


In [11]:
INPUT_DIM = x_train.shape[1]
print(INPUT_DIM)
model = MyNet(INPUT_DIM).to(device)

78


In [12]:
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-4)

In [13]:
def mean_absolute_error(y_true, y_pred):
    return torch.mean(torch.abs(y_true - y_pred)).item()

def regression_accuracy(y_true, y_pred, threshold=5.0):
    correct = torch.sum(torch.all(torch.abs(y_true - y_pred) < threshold, dim=1)).item()
    total = y_true.shape[0]
    return correct / total


In [None]:
EPOCHS = 150

total_loss_train_plot = []
total_loss_val_plot = []
total_acc_train_plot = []
total_acc_val_plot = []

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    running_mae = 0.0

    for inputs, labels in train_dl:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        running_mae += mean_absolute_error(labels, outputs) * inputs.size(0)
        
    epoch_loss = running_loss / len(train_dl.dataset)
    epoch_mae = running_mae / len(train_dl.dataset)
    
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss (MSE): {epoch_loss:.4f}, MAE: {epoch_mae:.4f}')



In [19]:
model = MyNet(INPUT_DIM).to(device)
model.load_state_dict(torch.load("second_iteration/model.pt"))

<All keys matched successfully>

In [None]:
model.eval()
with torch.no_grad():
    test_loss = 0.0
    test_mae = 0.0
    all_true = []
    all_preds = []
    for inputs, labels in test_dl:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)
        test_mae += mean_absolute_error(labels, outputs) * inputs.size(0)
        all_true.append(labels)
        all_preds.append(outputs)
        
    test_loss /= len(test_dl.dataset)
    test_mae /= len(test_dl.dataset)
    
      
    all_true = torch.cat(all_true, dim=0)
    all_preds = torch.cat(all_preds, dim=0)
    acc = regression_accuracy(all_true, all_preds, threshold=5.0)
    
    print(f'Test Loss (MSE): {test_loss:.4f}, Test MAE: {test_mae:.4f}, Regression Accuracy (error<{5.0}): {acc*100:.2f}%')


In [21]:
def inferencia(fila, model, scaler, threshold=5.0):
   

    features = fila.drop(labels=["blue_score", "red_score"]).values.reshape(1, -1)

    features_scaled = scaler.transform(features)

    tensor_features = torch.tensor(features_scaled, dtype=torch.float32).to(device)

    model.eval()
    with torch.no_grad():
        prediction = model(tensor_features)

    predicted_scores = prediction.cpu().numpy().flatten()

    true_scores = fila[["blue_score", "red_score"]].values.astype(np.float32)

    error_abs = np.abs(true_scores - predicted_scores)

    accuracy = 100.0 if (error_abs < threshold).all() else 0.0

    actual_winner = "blue" if true_scores[0] > true_scores[1] else "red"
    
    pred_winner = "blue" if predicted_scores[0] > predicted_scores[1] else "red"
    # print(
    #     f"Predicción -> blue_score: {predicted_scores[0]:.2f}, red_score: {predicted_scores[1]:.2f}"
    # )
    # print(
    #     f"Valor real -> blue_score: {true_scores[0]:.2f}, red_score: {true_scores[1]:.2f}"
    # )
    
    # print("Win prediction was", actual_winner == pred_winner)
    return actual_winner == pred_winner

In [None]:
fila_ejemplo = original_df.iloc[301]
print(type(fila_ejemplo))
inferencia(fila_ejemplo, model, scaler)

In [23]:
train_df, test_df = train_test_split(data_df, test_size=0.15, random_state=42)

correct_wins = 0
for i in range(len(test_df)):
    fila = test_df.iloc[i] 
    win = inferencia(fila, model, scaler, threshold=5.0)
    if win:
        correct_wins += 1
    
print(f"{((correct_wins / len(test_df))*100):.2f}%")


89.01%


In [238]:
torch.save(model.state_dict(), "model.pt")

In [None]:
import pandas as pd
import numpy as np

all_columns = data_df.columns.tolist()


target_cols = ["red_score", "blue_score"]


feature_cols = [col for col in all_columns if col not in target_cols]


from collections import defaultdict

grouped = defaultdict(lambda: {"red": [], "blue": []})

for col in feature_cols:
    if col.startswith("red"):
        var_base = "_".join(col.split("_")[1:])
        grouped[var_base]["red"].append(col)
    elif col.startswith("blue"):
        var_base = "_".join(col.split("_")[1:])
        grouped[var_base]["blue"].append(col)


new_data = pd.DataFrame()

for var, teams in grouped.items():
    if teams["red"]:
        new_data[f"red_{var}"] = data_df[teams["red"]].mean(axis=1)
    if teams["blue"]:
        new_data[f"blue_{var}"] = data_df[teams["blue"]].mean(axis=1)


new_data["red_score"] = data_df["red_score"]
new_data["blue_score"] = data_df["blue_score"]


new_data["winner"] = np.where(
    new_data["red_score"] > new_data["blue_score"],
    "red",
    np.where(new_data["red_score"] < new_data["blue_score"], "blue", "tie"),
)


print(new_data.head())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


sns.pairplot(new_data.sample(1000), hue='winner')
plt.show()