In [None]:
import os
import pandas as pd
import numpy as np
import ast
import random
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, classification_report
from torch.utils.data import Dataset, DataLoader

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
def cargar_embeddings(csv_path):
    df = pd.read_csv(csv_path)
    df = df[['video_id', 'features', 'label']].copy()
    df['label'] = df['label'].replace(2, 1)
    df['features'] = df['features'].apply(lambda x: np.array(ast.literal_eval(x)))
    return df

def cargar_folds(folds_path):
    df_folds = pd.read_csv(folds_path)
    df_folds.columns = ['fold', 'video_id', 'label']
    return df_folds

def datos_completos(df_embeddings, df_folds):
    df = df_embeddings.merge(df_folds[['video_id', 'fold']], on='video_id', how='inner')
    return df

In [None]:
def guardar_dataset_completo(df, nombre_embedding, carpeta,
                             base_output_dir='/content/drive/MyDrive/DatosTFG/embeddings_normalizados'):
    out_dir = os.path.join(base_output_dir, carpeta, nombre_embedding)
    os.makedirs(out_dir, exist_ok=True)

    X = np.stack(df['features'].values)
    y = df['label'].values
    folds = df['fold'].values

    np.save(os.path.join(out_dir, 'X.npy'), X)
    np.save(os.path.join(out_dir, 'y.npy'), y)
    np.save(os.path.join(out_dir, 'folds.npy'), folds)

    print(f"Guardado dataset completo en {out_dir}")

### Generar y guardar folds de embeddings individualmente

In [None]:
# Rutas
carpeta = '8f_0o'   # Nombre de la carpeta donde se encuentran los embeddings sin procesar: 32f_0o, 16f_0o, 8f_0o, 32f_2o, 32f_4o ó 32f_6o
embedding_path = f'/content/drive/MyDrive/DatosTFG/{carpeta}/slow_8x8_embeddings.csv'    # Ruta del embedding a procesar
folds_path = '/content/drive/MyDrive/DatosTFG/experiment_folds.csv'
nombre_embedding = 'slow_8x8'   # Nombre para la carpeta donde se guardarán los folds de ese embedding

# Ejecución
set_seed(42)
df_embeddings = cargar_embeddings(embedding_path)
df_folds = cargar_folds(folds_path)
df_completo = datos_completos(df_embeddings, df_folds)
guardar_dataset_completo(df_completo, nombre_embedding, carpeta)

### Generar y guardar folds de dos embeddings fusionados

In [None]:
def fusionar_embeddings(df1, df2, feat_col_1='features', feat_col_2='features'):
    df1 = df1.sort_values('video_id').reset_index(drop=True)
    df2 = df2.sort_values('video_id').reset_index(drop=True)
    assert df1['video_id'].tolist() == df2['video_id'].tolist(), "video_id desalineados"

    concatenados = [
        np.concatenate([f1, f2], axis=1)
        for f1, f2 in zip(df1[feat_col_1], df2[feat_col_2])
    ]

    return pd.DataFrame({
        'video_id': df1['video_id'],
        'features': concatenados,
        'label': df1['label']
    })

In [None]:
carpeta = '32f_6o'    # Nombre de la carpeta donde se encuentran los embeddings sin procesar: 32f_0o, 16f_0o, 8f_0o, 32f_2o, 32f_4o ó 32f_6o
csv_path_1 = f'/content/drive/MyDrive/DatosTFG/{carpeta}/slowfast_4x16_nln_embeddings.csv'    # Ruta del primer embedding de la fusión
csv_path_2 = f'/content/drive/MyDrive/DatosTFG/{carpeta}/i3d_nln_101_embeddings.csv'    # Ruta del segundo embedding de la fusión
folds_path = '/content/drive/MyDrive/DatosTFG/experiment_folds.csv'
nombre_embedding = 'fusion_slowfast4x16nln_i3dnln101'   # Nombre de la carpeta donde se guardarán los folds de la fusión

set_seed(42)

df_folds = cargar_folds(folds_path)
df_1 = cargar_embeddings(csv_path_1)
df_2 = cargar_embeddings(csv_path_2)

df_fusion = fusionar_embeddings(df_1, df_2)
df_fusion_completo = datos_completos(df_fusion, df_folds)

guardar_dataset_completo(df_fusion_completo, nombre_embedding, carpeta)

Guardado dataset completo en /content/drive/MyDrive/DatosTFG/embeddings_normalizados/32f_6o/fusion_slowfast4x16nln_i3dnln101
