In [14]:
import random
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, log_loss
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
# Realizar Grid Search
from itertools import product



# Seed all possible
seed_ = 935115
random.seed(seed_)
np.random.seed(seed_)
torch.manual_seed(seed_)

# If using CUDA, you can set the seed for CUDA devices as well
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed_)
    torch.cuda.manual_seed_all(seed_)
    
import torch.backends.cudnn as cudnn
cudnn.deterministic = True
cudnn.benchmark = False

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [15]:
data = pd.read_csv('../data/PROCESS/encoded_tch_prediction_data_zafrav3.2.csv')

# Create a mask for ZAFRA 22-23
mask_22_23 = data['ZAFRA'] == '23-24'

# Split the data
X_train = data[~mask_22_23].drop('TCH', axis=1)
y_train = data[~mask_22_23]['TCH']
X_test = data[mask_22_23].drop('TCH', axis=1)
y_test = data[mask_22_23]['TCH']

X_train = X_train.drop(columns=['ABS_IDCOMP','ZAFRA'])
X_test = X_test.drop(columns=['ABS_IDCOMP','ZAFRA'])

print("\nZAFRA Split:")
print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

data[~mask_22_23].to_csv('../data/UP/TRAIN.csv',index=False)
data[mask_22_23].to_csv('../data/UP/TEST.csv',index=False)


In [16]:

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)



# Custom Dataset class
class CaneDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create Dataset instances
train_dataset = CaneDataset(X_train_tensor, y_train_tensor)
test_dataset = CaneDataset(X_test_tensor, y_test_tensor)

# Create DataLoader instances
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Example usage
for X_batch, y_batch in train_loader:
    print(f"X batch shape: {X_batch.shape}")
    print(f"y batch shape: {y_batch.shape}")
    break

In [37]:
class RegressionNN(nn.Module):
    def __init__(self, input_size):
        super(RegressionNN, self).__init__()
        self.network = nn.Sequential(
            
            nn.Linear(input_size, 512),
            nn.Sigmoid(),
            nn.Linear(512, 256),
            nn.Sigmoid(),
            
            nn.Linear(256, 128),
            nn.Sigmoid(),
            # nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.Sigmoid(),
#             nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.Sigmoid(),
#             nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
#             nn.Dropout(0.2),
            nn.Linear(16, 16),
            nn.ReLU(),
#             nn.Dropout(0.2),
            nn.Linear(16, 32),
            nn.ReLU(),
#             nn.Dropout(0.2),
            nn.Linear(32, 64),
            nn.ReLU(),
#             nn.Dropout(0.2),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.network(x)

In [38]:
loss_history = [] # DO NOT DELETE
input_size = X_train.shape[1]


def train_model(model, train_loader, optimizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:


            inputs = inputs.view(-1, input_size)

            outputs = model(inputs)

            loss = criterion(outputs, labels)


            optimizer.zero_grad()
            loss.backward()


            optimizer.step()

            running_loss += loss.item()



        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}")
        loss_history.append(running_loss/len(train_loader))

    print("Training complete!")

In [57]:
model = RegressionNN(input_size)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model,train_loader,optimizer,criterion,epochs=300)


In [58]:
# Plotting the loss over epochs
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(loss_history) + 1), loss_history, marker='o')
plt.title("Training Loss Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()

In [54]:
def test_model(model, test_loader):
    model.eval()
    all_labels = []
    all_outputs = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            all_labels.extend(labels.numpy())
            all_outputs.extend(outputs.numpy())
    r2 = r2_score(all_labels, all_outputs)
    mae = mean_absolute_error(all_labels, all_outputs)
    
    print(f"Test R2 Score: {r2:.4f} MAE: {mae:.4f}")
    return r2
test_model(model, test_loader)

In [59]:
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor).cpu().detach().numpy()
    y_test_np = y_test_tensor.cpu().detach().numpy()

plt.figure(figsize=(12, 6))
plt.scatter(y_test_np, predictions, alpha=0.5, color='blue', label='Predicciones')
plt.scatter(y_test_np, y_test_np, alpha=0.5, color='red', label='Valores reales')
plt.plot([y_test_np.min(), y_test_np.max()], [y_test_np.min(), y_test_np.max()], 'r', lw=2)
plt.title('Predicted vs Actual Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.legend()
plt.show()