In [None]:
import cv2
from Load_RGB import CASME2Dataset
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split

from torchvision.models.video import r3d_18

# Modelo ResNet 3D (acepta datos de forma [batch_size, C, T, H, W])
model = r3d_18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 3)  # Ajustar la última capa

# Mover modelo a GPU si está disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

database = CASME2Dataset()

total_size = database.__len__()


In [None]:
train_ratio = 0.8  # 80% para entrenamiento, 20% para prueba
train_size = int(train_ratio * total_size)
test_size = total_size - train_size

# Dividir el dataset
train_dataset, test_dataset = random_split(database, [train_size, test_size])

In [None]:
batch_size = 2
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

In [None]:
import torch.optim as optim

# Definir función de pérdida
criterion = nn.CrossEntropyLoss()

# Definir optimizador
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 5
model.train()  # Poner el modelo en modo de entrenamiento

for epoch in range(num_epochs):
    
    running_loss = 0.0

    for videos, labels in train_loader:
        videos, labels = videos.to(device), labels.to(device)  # Mover datos a GPU si está disponible

        # Reiniciar gradientes
        optimizer.zero_grad()

        # Hacer forward y calcular pérdida
        outputs = model(videos)  # Pasar los videos por el modelo
        loss = criterion(outputs, labels)

        # Hacer backward y optimizar
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

    if epoch%5 == 0:
        checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss}
        torch.save(checkpoint, 'D:\PythonCourse\ME_Recognition\Data\models\checkpoint.pth')

In [None]:
model.eval()  # Cambiar a modo de evaluación
correct = 0
total = 0

with torch.no_grad():  # Desactivar cálculo de gradientes
    for videos, labels in test_loader:
        videos, labels = videos.to(device), labels.to(device)
        outputs = model(videos)
        _, predicted = torch.max(outputs, 1)  # Obtener la clase con mayor probabilidad
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")


In [None]:
# Guardar los índices utilizados en el split
train_indices = train_dataset.indices
test_indices = test_dataset.indices

torch.save({'train': train_indices, 'test': test_indices}, 'D:\PythonCourse\ME_Recognition\Data\models\split_indices.pth')

In [6]:
checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss
    }
torch.save(checkpoint, 'D:\PythonCourse\ME_Recognition\Data\models\checkpoint.pth')

In [7]:
torch.save(model.state_dict(), 'D:\PythonCourse\ME_Recognition\Data\models\\r3d_18_weights.pth')

In [None]:
# # Cargar el archivo de video
# cap = cv2.VideoCapture(video_path)

# # Verificar si se pudo abrir correctamente
# if not cap.isOpened():
#     print("No se pudo abrir el archivo de video.")
#     exit()

# # Leer y mostrar cada cuadro del video
# while True:
#     ret, frame = cap.read()
    
#     # Si no hay más cuadros, salir del bucle
#     if not ret:
#         print("Fin del video o no se pudieron leer más cuadros.")
#         break

#     # Mostrar el cuadro en una ventana
#     cv2.imshow("Video", frame)
    
#     # Salir si se presiona la tecla 'q'
#     if cv2.waitKey(25) & 0xFF == ord('q'):
#         break

# # Liberar el recurso del video y cerrar las ventanas
# cap.release()
# cv2.destroyAllWindows()


In [1]:
import cv2
from Load_RGB import CASME2Dataset
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
import torch.optim as optim
from torchvision.models.video import r3d_18

# Modelo ResNet 3D (acepta datos de forma [batch_size, C, T, H, W])
model = r3d_18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 3)  # Ajustar la última capa

# Mover modelo a GPU si está disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Dataset
database = CASME2Dataset()  # ¿Necesita argumentos adicionales?

# Definir función de pérdida
criterion = nn.CrossEntropyLoss()

# Cargar el checkpoint con el dispositivo adecuado
checkpoint = torch.load('D:\PythonCourse\ME_Recognition\Data\models\checkpoint.pth', map_location=device)

# Restaurar el estado del modelo y del optimizador
model.load_state_dict(checkpoint['model_state_dict'])

# Definir y cargar el optimizador
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Asegúrate de que lr sea el mismo
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Recuperar la epoch, batch y pérdida
start_epoch = checkpoint['epoch']
start_batch = checkpoint.get('batch', 0)  # Opcional
loss_value = checkpoint.get('loss', None)  # Opcional

# Cargar los índices de entrenamiento y prueba
split_indices = torch.load('D:\PythonCourse\ME_Recognition\Data\models\split_indices.pth')
train_indices = split_indices['train']
test_indices = split_indices['test']

# Crear subsets
train_dataset = Subset(database, train_indices)
test_dataset = Subset(database, test_indices)

# Configuración del DataLoader
batch_size = 2  # Considera aumentar este valor si es posible
torch.manual_seed(42)  # Reproducibilidad
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)




In [None]:
start_epoch = epoch
num_epochs = 10
model.train()  # Poner el modelo en modo de entrenamiento

for epoch in range(num_epochs):
    
    running_loss = 0.0

    for videos, labels in train_loader:
        videos, labels = videos.to(device), labels.to(device)  # Mover datos a GPU si está disponible

        # Reiniciar gradientes
        optimizer.zero_grad()

        # Hacer forward y calcular pérdida
        outputs = model(videos)  # Pasar los videos por el modelo
        loss = criterion(outputs, labels)

        # Hacer backward y optimizar
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

    if epoch%2 == 0:
        checkpoint = {
        'epoch': epoch+start_epoch+1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss}
        torch.save(checkpoint, 'D:\PythonCourse\ME_Recognition\Data\models\checkpoint.pth')

Epoch [1/5], Loss: 1.0246
Epoch [2/5], Loss: 1.0021
Epoch [3/5], Loss: 1.0088
Epoch [4/5], Loss: 1.0068
Epoch [5/5], Loss: 0.9771


In [5]:
model.eval()  # Cambiar a modo de evaluación
correct = 0
total = 0

with torch.no_grad():  # Desactivar cálculo de gradientes
    for videos, labels in test_loader:
        videos, labels = videos.to(device), labels.to(device)
        outputs = model(videos)
        _, predicted = torch.max(outputs, 1)  # Obtener la clase con mayor probabilidad
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 54.90%
