In [32]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn

# 1. Carga de datos
Se puede cambiar la fracción de valores a cargar para realizar los entrenamientos con un mayor % de valores.

In [33]:
folder_path = r"/home/alkain/Escritorio/Github_Repositorios/Microgrid/objective2_data_cleaned" 

csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

df_list = []
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path, parse_dates=["Time Stamp (local standard time) yyyy-mm-ddThh:mm:ss"])
    
    # Selecciona aleatoriamente el 5% de las filas
    sample_df = df.sample(frac=0.05, random_state=42)  # Ajusta random_state para reproducibilidad
    
    df_list.append(sample_df)

df_merged = pd.concat(df_list, ignore_index=True)
df_merged = df_merged.drop(columns='Time Stamp (local standard time) yyyy-mm-ddThh:mm:ss')

In [34]:
print(df_merged.size)

1025560


# 2. Preprocesamiento de datos

## 2.1. División de datos

In [35]:
from sklearn.model_selection import train_test_split
import torch

# Cargar el dataset
import pandas as pd
df = df_merged

# Separar variables independientes y dependiente
X = df.drop(columns=['Pmp (W)'])
y = df['Pmp (W)']

# División en entrenamiento, validación y prueba (70/20/10)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.33, random_state=42)  # 20/10

# Verificar tamaños de conjuntos de datos
print("Tamaño del dataset completo:", len(df))
print("\nTamaño de particiones:")
print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_valid:", X_valid.shape, "y_valid:", y_valid.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)
print("\nPorcentajes de particiones:")
print(f"Train: {len(X_train)/len(df)*100:.2f}%")
print(f"Valid: {len(X_valid)/len(df)*100:.2f}%")
print(f"Test: {len(X_test)/len(df)*100:.2f}%")

Tamaño del dataset completo: 102556

Tamaño de particiones:
X_train: (71789, 9) y_train: (71789,)
X_valid: (20613, 9) y_valid: (20613,)
X_test: (10154, 9) y_test: (10154,)

Porcentajes de particiones:
Train: 70.00%
Valid: 20.10%
Test: 9.90%



## 2.2. Creación de Datasets y Dataloaders

In [36]:
from torch.utils.data import Dataset, DataLoader

class SolarDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).unsqueeze(1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Crear datasets
train_dataset = SolarDataset(X_train, y_train)
valid_dataset = SolarDataset(X_valid, y_valid)
test_dataset = SolarDataset(X_test, y_test)

# Verificar tamaños de los datasets de PyTorch
print("\nVerificación de datasets de PyTorch:")
print("Tamaño train_dataset:", len(train_dataset))
print("Tamaño valid_dataset:", len(valid_dataset))
print("Tamaño test_dataset:", len(test_dataset))
print("Dimensiones de X en train_dataset:", train_dataset.X.shape)
print("Dimensiones de y en train_dataset:", train_dataset.y.shape)

# Crear dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Verificar tamaños de los dataloaders
print("\nVerificación de dataloaders:")
print("Número de batches en train_loader:", len(train_loader))
print("Número de batches en valid_loader:", len(valid_loader))
print("Número de batches en test_loader:", len(test_loader))

# Verificar un batch para asegurarse de que los datos están correctamente formateados
X_batch, y_batch = next(iter(train_loader))
print("\nVerificación de un batch de entrenamiento:")
print("Dimensiones de X_batch:", X_batch.shape)
print("Dimensiones de y_batch:", y_batch.shape)
print("Tipo de datos de X_batch:", X_batch.dtype)
print("Tipo de datos de y_batch:", y_batch.dtype)


Verificación de datasets de PyTorch:
Tamaño train_dataset: 71789
Tamaño valid_dataset: 20613
Tamaño test_dataset: 10154
Dimensiones de X en train_dataset: torch.Size([71789, 9])
Dimensiones de y en train_dataset: torch.Size([71789, 1])

Verificación de dataloaders:
Número de batches en train_loader: 2244
Número de batches en valid_loader: 645
Número de batches en test_loader: 318

Verificación de un batch de entrenamiento:
Dimensiones de X_batch: torch.Size([32, 9])
Dimensiones de y_batch: torch.Size([32, 1])
Tipo de datos de X_batch: torch.float32
Tipo de datos de y_batch: torch.float32


# 3. Definición de Modelos de Deep Learning

## 3.1. Red neuronal Densa (MLP)

In [37]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, activation_fn=nn.ReLU):
        super(MLP, self).__init__()
        layers = []
        in_size = input_size
        
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(in_size, hidden_size))
            layers.append(activation_fn())  # Se puede cambiar a ReLU, LeakyReLU o tanh
            in_size = hidden_size
        
        layers.append(nn.Linear(in_size, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

input_size = X_train.shape[1]
output_size = 1
hidden_sizes = [64, 32, 16]  # Modificar para probar distintas arquitecturas

model1 = MLP(input_size, hidden_sizes, output_size, activation_fn=nn.ReLU)

## 3.2. Redes recurrentes (RNN, LSTM, GRU)

In [38]:
class RecurrentModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, model_type='LSTM'):
        super(RecurrentModel, self).__init__()
        self.model_type = model_type
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        if model_type == 'LSTM':
            self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        elif model_type == 'GRU':
            self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        else:
            self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x: (batch_size, seq_len, input_size)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) if self.model_type == 'LSTM' else None

        if self.model_type == 'LSTM':
            out, _ = self.rnn(x, (h0, c0))
        else:
            out, _ = self.rnn(x, h0)

        out = self.fc(out[:, -1, :])  # Tomar la salida de la última capa
        return out

input_size = X_train.shape[1]  # Cambiado para datos tabulares
hidden_size = 64
output_size = 1
num_layers = 2

# Modelos disponibles: 'RNN', 'LSTM', 'GRU'
model2 = RecurrentModel(input_size, hidden_size, output_size, num_layers, model_type='LSTM')

## 3.3. Redes neuronales convolucionales (CNN)

In [39]:
class CNNModel(nn.Module):
    def __init__(self, input_channels, output_size):
        super(CNNModel, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv1d(input_channels, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),

            nn.Conv1d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(32 * (input_size // 4), 64),  # Ajustar según el tamaño final
            nn.ReLU(),
            nn.Linear(64, output_size)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

input_channels = 1  # Para datos tabulares, reorganizar a (batch_size, 1, input_size)
output_size = 1
input_size = X_train.shape[1]

model3 = CNNModel(input_channels, output_size)

In [None]:
# Funciones de pérdida
criterion_mse = nn.MSELoss()
criterion_smooth_l1 = nn.SmoothL1Loss()

# Optimizadores
optimizer_adam = torch.optim.Adam(model1.parameters(), lr=0.001)
optimizer_sgd = torch.optim.SGD(model2.parameters(), lr=0.01, momentum=0.9)
optimizer_rmsprop = torch.optim.RMSprop(model3.parameters(), lr=0.001)

# 4. Función de Pérdida y Optimizador

## 4.1. Funciones de pérdida

### 4.1.1. MSELoss

In [41]:
criterion_mse = nn.MSELoss()

### 4.1.2. SmoothL1Loss

In [42]:
criterion_smooth_l1 = nn.SmoothL1Loss()

## 4.2. Optimizadores

### 4.2.1. Adam

In [43]:
optimizer_adam = torch.optim.Adam(model1.parameters(), lr=0.001)
optimizer_adam = torch.optim.Adam(model2.parameters(), lr=0.001)
optimizer_adam = torch.optim.Adam(model3.parameters(), lr=0.001)

### 4.2.2. SGD

In [44]:
optimizer_sgd = torch.optim.SGD(model1.parameters(), lr=0.01, momentum=0.9)
optimizer_sgd = torch.optim.SGD(model2.parameters(), lr=0.01, momentum=0.9)
optimizer_sgd = torch.optim.SGD(model3.parameters(), lr=0.01, momentum=0.9)

### 4.2.3. RMSprop

In [45]:
optimizer_rmsprop = torch.optim.RMSprop(model1.parameters(), lr=0.001)
optimizer_rmsprop = torch.optim.RMSprop(model2.parameters(), lr=0.001)
optimizer_rmsprop = torch.optim.RMSprop(model3.parameters(), lr=0.001)


## 4.3 Ajuste de hiperparámetros

In [51]:
# Ajuste de hiperparámetros
def train_model(model, criterion, optimizer, num_epochs=20):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

# Ajuste de modelos
print("Ajustando modelo MLP (model1) con MSE y Adam")
train_model(model1, criterion_mse, optimizer_adam)

#print("Ajustando modelo RNN (model2) con SmoothL1 y SGD")
#train_model(model2, criterion_smooth_l1, optimizer_sgd)

print("Ajustando modelo CNN (model3) con MSE y RMSprop")
train_model(model3, criterion_mse, optimizer_rmsprop)

Ajustando modelo MLP (model1) con MSE y Adam
Epoch [1/20], Loss: 7388.0097
Epoch [2/20], Loss: 7387.2551
Epoch [3/20], Loss: 7387.6215
Epoch [4/20], Loss: 7388.3173
Epoch [5/20], Loss: 7388.9312
Epoch [6/20], Loss: 7387.5117
Epoch [7/20], Loss: 7387.4679
Epoch [8/20], Loss: 7389.0268
Epoch [9/20], Loss: 7387.9006
Epoch [10/20], Loss: 7387.9845
Epoch [11/20], Loss: 7388.9768
Epoch [12/20], Loss: 7387.6352
Epoch [13/20], Loss: 7387.5778
Epoch [14/20], Loss: 7387.4921
Epoch [15/20], Loss: 7387.8160
Epoch [16/20], Loss: 7387.2549
Epoch [17/20], Loss: 7387.5245
Epoch [18/20], Loss: 7387.1531
Epoch [19/20], Loss: 7388.0366
Epoch [20/20], Loss: 7387.6469
Ajustando modelo CNN (model3) con MSE y RMSprop


RuntimeError: Given groups=1, weight of size [16, 1, 3], expected input[1, 32, 9] to have 1 channels, but got 32 channels instead

# 5. Entrenamiento y validación de los modelos

Se define un loop de entrenamiento

# 6. Evaluación y comparación de modelos

## 6.1. Comparando MSE

## 6.2. Comparando RMSE

## 6.3, Comparando MAE

## 6.4. Gráficas de predicciones vs valores reales

## 6.5. Comparar rendimiento de cada arquitectura