In [1]:
import torch
print("¿CUDA disponible?:", torch.cuda.is_available())
print("Número de GPUs:", torch.cuda.device_count())
print("Nombre GPU:", torch.cuda.get_device_name(0))

¿CUDA disponible?: True
Número de GPUs: 1
Nombre GPU: NVIDIA GeForce RTX 3090


In [5]:
!pip install av

Collecting av
  Downloading av-14.4.0-cp311-cp311-win_amd64.whl.metadata (4.7 kB)
Downloading av-14.4.0-cp311-cp311-win_amd64.whl (27.9 MB)
   ---------------------------------------- 0.0/27.9 MB ? eta -:--:--
   - -------------------------------------- 1.0/27.9 MB 10.1 MB/s eta 0:00:03
   ----- ---------------------------------- 3.7/27.9 MB 10.9 MB/s eta 0:00:03
   -------- ------------------------------- 6.0/27.9 MB 11.2 MB/s eta 0:00:02
   ------------ --------------------------- 8.7/27.9 MB 11.4 MB/s eta 0:00:02
   --------------- ------------------------ 11.0/27.9 MB 11.5 MB/s eta 0:00:02
   ------------------- -------------------- 13.6/27.9 MB 11.4 MB/s eta 0:00:02
   ---------------------- ----------------- 16.0/27.9 MB 11.4 MB/s eta 0:00:02
   -------------------------- ------------- 18.6/27.9 MB 11.5 MB/s eta 0:00:01
   ------------------------------ --------- 21.0/27.9 MB 11.5 MB/s eta 0:00:01
   --------------------------------- ------ 23.3/27.9 MB 11.5 MB/s eta 0:00:01
   -

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_video
from torchvision.models.video import r2plus1d_18

# ——————————————————————————————
# 1) CONFIGURACIÓN
# ——————————————————————————————
INPUT_DIR    = os.path.join("..", "clips_prueba")
FPS          = 30
CLIP_FRAMES  = 32    # cada clip tiene 32 frames (~1s)
BATCH_SIZE   = 4
NUM_EPOCHS   = 10
LEARNING_RATE= 1e-4
NUM_CLASSES  = 2     # 0 = verdad, 1 = mentira

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# ——————————————————————————————
# 2) PREPARAR RUTAS Y ETIQUETAS
# ——————————————————————————————
# Asume que dentro de INPUT_DIR tienes dos subcarpetas:
truth_dir = os.path.join(INPUT_DIR, "verdad")
lie_dir   = os.path.join(INPUT_DIR, "mentira")

truth_paths = [os.path.join(truth_dir, f)
               for f in os.listdir(truth_dir) if f.endswith(".mp4")]
lie_paths   = [os.path.join(lie_dir,   f)
               for f in os.listdir(lie_dir)   if f.endswith(".mp4")]

clip_paths = truth_paths + lie_paths
labels     = [0]*len(truth_paths) + [1]*len(lie_paths)


# ——————————————————————————————
# 3) DATASET + DATALOADER
# ——————————————————————————————
class ClipDataset(Dataset):
    def __init__(self, paths, labels, clip_len=CLIP_FRAMES):
        self.paths     = paths
        self.labels    = labels
        self.clip_len  = clip_len

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        video, _, _ = read_video(path, pts_unit="sec")  
        # video: [T, H, W, C]
        # recortamos/pad por si sobran o faltan frames
        frames = video[:self.clip_len]  
        if frames.shape[0] < self.clip_len:
            pad_len = self.clip_len - frames.shape[0]
            pad = torch.zeros((pad_len, *frames.shape[1:]), dtype=frames.dtype)
            frames = torch.cat([frames, pad], dim=0)

        # pasamos a [C, T, H, W] y normalizamos
        frames = frames.permute(3,0,1,2).float() / 255.0

        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return frames, label

dataset   = ClipDataset(clip_paths, labels)
dataloader= DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)


# ——————————————————————————————
# 4) MODELO (R(2+1)D pre-entrenado)
# ——————————————————————————————
model = r2plus1d_18(pretrained=True)
in_feat = model.fc.in_features
model.fc = nn.Linear(in_feat, NUM_CLASSES)
model = model.to(device)


# ——————————————————————————————
# 5) CRITERIO Y OPTIMIZADOR
# ——————————————————————————————
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


# ——————————————————————————————
# 6) BUCLE DE ENTRENAMIENTO
# ——————————————————————————————
for epoch in range(1, NUM_EPOCHS+1):
    model.train()
    running_loss = 0.0
    correct = 0
    total   = 0

    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)             # [B, 2]
        loss    = criterion(outputs,targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = outputs.max(1)
        correct   += preds.eq(targets).sum().item()
        total     += targets.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = 100 * correct / total
    print(f"Epoch {epoch}/{NUM_EPOCHS} — "
          f"Loss: {epoch_loss:.4f} — Acc: {epoch_acc:.1f}%")



Epoch 1/10 — Loss: 0.6188 — Acc: 62.9%
Epoch 2/10 — Loss: 0.4363 — Acc: 77.1%
Epoch 3/10 — Loss: 0.3225 — Acc: 85.6%
Epoch 4/10 — Loss: 0.2628 — Acc: 89.2%
Epoch 5/10 — Loss: 0.2361 — Acc: 89.8%
Epoch 6/10 — Loss: 0.2166 — Acc: 91.5%
Epoch 7/10 — Loss: 0.1274 — Acc: 96.6%
Epoch 8/10 — Loss: 0.1264 — Acc: 94.6%
Epoch 9/10 — Loss: 0.1142 — Acc: 96.3%
Epoch 10/10 — Loss: 0.0951 — Acc: 97.2%


In [2]:
# Después de terminar todos los epochs
save_path = "r2plus1d_lie_detector.pth"
torch.save(model.state_dict(), save_path)
print(f"Modelo guardado en {save_path}")

Modelo guardado en r2plus1d_lie_detector.pth


# Para cargar el modelo

In [None]:
# 1) Reconstruyes la arquitectura igual que antes
model = r2plus1d_18(pretrained=False)       # o True, según necesites
in_feat = model.fc.in_features
model.fc = nn.Linear(in_feat, NUM_CLASSES)
model = model.to(device)

# 2) Cargas los pesos
checkpoint = torch.load("r2plus1d_lie_detector.pth", map_location=device)
model.load_state_dict(checkpoint)
model.eval()   # muy importante si luego vas a inferir