In [None]:
import pandas as pd
import torch
import torch.nn as nn

import numpy as np

from torch.utils.data import random_split

import sys

## Subir datasets de fallas de corriente

In [None]:
from google.colab import files
uploaded = files.upload()  # sensorless_tarea4_train.txt

In [None]:
from google.colab import files
uploaded = files.upload()  # sensorless_tarea4_test.txt

## Leer datasets de fallas de corriente

In [2]:
!ls

sample_data  sensorless_tarea4_test.txt  sensorless_tarea4_train.txt


In [3]:
column_names = ["feat" + str(i) for i in range(48)]
column_names.append("class")

In [None]:
df_train_val = pd.read_csv('sensorless_tarea4_train.txt', names = column_names)
df_train_val["class"] = df_train_val["class"] - 1
df_train_val

In [None]:
df_test = pd.read_csv('sensorless_tarea4_test.txt', names = column_names)
df_test["class"] = df_test["class"] - 1
df_test

## Crear modelo

In [6]:
model = nn.Sequential(
          nn.Linear(48, 11)
        )

In [7]:
device = torch.device('cuda')

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

## Crear datasets y dataloaders para pytorch

In [8]:
# Crear datasets

feats_train_val = df_train_val.to_numpy()[:,0:48].astype(np.float32)
labels_train_val = df_train_val.to_numpy()[:,48].astype(int)
dataset_train_val = [ {"features":feats_train_val[i,:], "labels":labels_train_val[i]} for i in range(feats_train_val.shape[0]) ]

feats_test = df_test.to_numpy()[:,0:48].astype(np.float32)
labels_test = df_test.to_numpy()[:,48].astype(int)
dataset_test = [ {"features":feats_test[i,:], "labels":labels_test[i]} for i in range(feats_test.shape[0]) ]

n_train = int(df_train_val.shape[0]*0.85)
n_val = df_train_val.shape[0] - n_train

dataset_train, dataset_val = random_split(dataset_train_val, [n_train, n_val])

In [9]:
# Normalizar datos

fdata = []
i = 0

for x in dataset_train:
  fdata.append(x['features'])

fdata = np.array(fdata)

fmean= np.mean(fdata, axis=0)
fstd = np.std(fdata, axis=0)

for x in dataset_train:
  x['features'] = (x['features']-fmean) / fstd

for x in dataset_val:
  x['features'] = (x['features']-fmean) / fstd

for x in dataset_test:
  x['features'] = (x['features']-fmean) / fstd


In [10]:
# Crear dataloaders
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=128, shuffle=True, num_workers=0)
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=128, shuffle=True, num_workers=0)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=128, shuffle=True, num_workers=0)

## Entrenamiento
Se puede interrumpir con: Entorno de Ejecución -> interrumpir la ejecución

In [None]:
for epoch in range(1000):
    try:
      sys.stdout.write("\rÉpoca %d  "  %(epoch))
      model.train()

      # Train on the current epoch
      for i, data in enumerate(dataloader_train, 0):
          inputs = data["features"].to(device)
          labels = data["labels"].to(device)

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = model(inputs)

          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

      # Compute validation loss and accuracy for current epoch
      model.eval()

      with torch.no_grad():  
        for i, data in enumerate(dataloader_val, 0):
            inputs = data["features"].to(device)
            labels = data["labels"].to(device)

            outputs = model(inputs)
            # Calcular loss de validación

      # Imprimir: numero de época, loss de entrenamiento y loss de validación
      # Se debe usar sys.stdout.write() para que la línea de texto se sobreescriba en vez de imprimirse línea por línea
      # No se debe guardar checkpoints en cada época (guardarlos cada 50 épocas)

    except KeyboardInterrupt:
      print("\nEntrenamiento interrumpido")
      break

print('\nEntrenamiento finalizado')