# Avaliação Automatizada de Suturas Cirúrgicas com Deep Learning

### Imports

In [231]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler

In [232]:
# Alvos OSATS
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar dados
train_df = pd.read_csv("dataframes_por_inspetor/train_B.csv")
test_df = pd.read_csv("dataframes_por_inspetor/test_B.csv")

y_train = train_df[osats_cols]
colunas_a_excluir = ['video', 'GLOBA_RATING_SCORE'] + osats_cols
features = [col for col in train_df.columns if col not in colunas_a_excluir]

# Normalizar
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(train_df[features]), columns=features)
X_test = pd.DataFrame(scaler.transform(test_df[features]), columns=features)

In [233]:
# Dataset
class OSATSDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = OSATSDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

## Modelos

In [234]:
# Modelo MLP multivariado
class MLPRegressorOSATS(nn.Module):
    def __init__(self, input_size):
        super(MLPRegressorOSATS, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 8)  # 8 saídas para OSATS
        )
    def forward(self, x):
        return self.network(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPRegressorOSATS(input_size=X_train.shape[1]).to(device)
criterion = nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Treinar
best_loss = float('inf')
model.train()
for epoch in range(100):
    total_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
    if total_loss < best_loss:
        best_loss = total_loss
        torch.save(model.state_dict(), "best_model_task2.pt")

# Previsão
model.load_state_dict(torch.load("best_model_task2.pt"))
model.eval()
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)
with torch.no_grad():
    preds = model(X_test_tensor).cpu().numpy()

# Clip e round para [0, 4]
preds = np.clip(np.round(preds), 0, 4).astype(int)

# Guardar CSV
df_pred_task2 = pd.DataFrame(preds, columns=osats_cols)
df_pred_task2.insert(0, "VIDEO", test_df["video"])
df_pred_task2.to_csv("task2_predicoes_mlp.csv", index=False)
print("✅ CSV gerado: task2_predicoes_mlp.csv")

Epoch 1, Loss: 9.3472
Epoch 2, Loss: 8.6555
Epoch 3, Loss: 7.8352
Epoch 4, Loss: 7.0220
Epoch 5, Loss: 5.9119
Epoch 6, Loss: 4.5245
Epoch 7, Loss: 3.1242
Epoch 8, Loss: 2.1379
Epoch 9, Loss: 1.8304
Epoch 10, Loss: 1.5862
Epoch 11, Loss: 1.3129
Epoch 12, Loss: 1.0701
Epoch 13, Loss: 1.0745
Epoch 14, Loss: 1.1788
Epoch 15, Loss: 1.0620
Epoch 16, Loss: 1.1850
Epoch 17, Loss: 0.9046
Epoch 18, Loss: 0.9522
Epoch 19, Loss: 0.9922
Epoch 20, Loss: 0.9062
Epoch 21, Loss: 0.8749
Epoch 22, Loss: 0.8934
Epoch 23, Loss: 0.8749
Epoch 24, Loss: 0.8293
Epoch 25, Loss: 0.7270
Epoch 26, Loss: 0.8987
Epoch 27, Loss: 0.8750
Epoch 28, Loss: 0.7901
Epoch 29, Loss: 0.8004
Epoch 30, Loss: 0.8029
Epoch 31, Loss: 0.8018
Epoch 32, Loss: 0.6903
Epoch 33, Loss: 0.7050
Epoch 34, Loss: 0.8547
Epoch 35, Loss: 0.7815
Epoch 36, Loss: 0.7102
Epoch 37, Loss: 0.7196
Epoch 38, Loss: 0.8055
Epoch 39, Loss: 0.7311
Epoch 40, Loss: 0.7823
Epoch 41, Loss: 0.6822
Epoch 42, Loss: 0.6656
Epoch 43, Loss: 0.7724
Epoch 44, Loss: 0.72

## Cálculo das métricas

In [235]:
from sklearn.metrics import f1_score, accuracy_score
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"

# Garantir consistência de nomes
df_pred_task2.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred_task2["video"] = df_pred_task2["video"].astype(str).str.strip().str.lower()

# Carregar ficheiro original de anotação
df_osats = pd.read_csv(CSV_PATH, sep=";")

# Filtrar apenas entradas do INVESTIGADOR B e vídeos presentes no test_B
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Manter apenas vídeos presentes nas previsões
videos_preditos = set(df_pred_task2["video"])
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(videos_preditos)]

# Calcular média se houver mais que uma entrada por vídeo
df_truth_media = df_osats_b.groupby("VIDEO")[osats_cols].mean().round().astype(int).reset_index()
df_truth_media.rename(columns={"VIDEO": "video"}, inplace=True)

# Merge previsões com verdadeiros
df_merged = pd.merge(df_pred_task2, df_truth_media, on="video", suffixes=("_pred", "_true"))
print(f"📦 Total de amostras para avaliação: {len(df_merged)}\n")

# Avaliação
f1_scores = []
accuracies = []

for col in osats_cols:
    y_true = df_merged[f"{col}_true"]
    y_pred = df_merged[f"{col}_pred"]
    
    print(f"\n🔎 {col}: Exemplo (previsto vs real):")
    for i in range(min(5, len(df_merged))):
        print(f"  🎥 {df_merged['video'].iloc[i]} → Pred: {y_pred.iloc[i]}, Real: {y_true.iloc[i]}")
    
    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    f1_scores.append(f1)
    accuracies.append(acc)
    print(f"📊 {col}: F1-score = {f1:.4f}, Accuracy = {acc:.4f}")

# SmoothL1Loss
y_true_tensor = torch.tensor(df_merged[[f"{col}_true" for col in osats_cols]].values, dtype=torch.float32).to(device)
y_pred_tensor = torch.tensor(df_merged[[f"{col}_pred" for col in osats_cols]].values, dtype=torch.float32).to(device)
loss = criterion(y_pred_tensor, y_true_tensor).item()

# Resultados globais
print("\n📈 MÉTRICAS GLOBAIS (Task 2):")
print(f"🎯 F1-score médio (macro): {np.mean(f1_scores):.4f}")
print(f"📊 Accuracy média: {np.mean(accuracies):.4f}")
print(f"💥 SmoothL1Loss final: {loss:.4f}")


📦 Total de amostras para avaliação: 95


🔎 OSATS_RESPECT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 2, Real: 5
  🎥 o38n → Pred: 2, Real: 3
  🎥 k55r → Pred: 3, Real: 2
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 4, Real: 3
📊 OSATS_RESPECT: F1-score = 0.1983, Accuracy = 0.2526

🔎 OSATS_MOTION: Exemplo (previsto vs real):
  🎥 j88t → Pred: 2, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 2, Real: 3
  🎥 g11v → Pred: 1, Real: 3
  🎥 z49x → Pred: 4, Real: 3
📊 OSATS_MOTION: F1-score = 0.2370, Accuracy = 0.3789

🔎 OSATS_INSTRUMENT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 1
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 2, Real: 4
  🎥 z49x → Pred: 4, Real: 3
📊 OSATS_INSTRUMENT: F1-score = 0.1685, Accuracy = 0.2632

🔎 OSATS_SUTURE: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 2, Real: 3
  🎥 g11v → Pred: 2, Real: 4
  🎥 z49x → Pred: 4, Real: 3
📊 OSATS_SUTURE: F1-score = 0.2259, Accuracy = 0.347

## Correlação entre as features e OSATS*

In [236]:
import pandas as pd

# Caminhos dos ficheiros
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"
FEATURES_PATH = "datagrama_features_joao.csv"

# Colunas OSATS a analisar
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar OSATS.csv e normalizar nomes
df_osats = pd.read_csv(CSV_PATH, sep=';')
df_osats['VIDEO'] = df_osats['VIDEO'].astype(str).str.strip().str.lower()
df_b = df_osats[df_osats['INVESTIGATOR'] == 'B'].copy()

# Corrigir vírgulas decimais (e converter para float)
df_b['SUTURES'] = df_b['SUTURES'].astype(str).str.replace(',', '.').astype(float)
for col in osats_cols:
    df_b[col] = df_b[col].astype(str).str.replace(',', '.').astype(float)

# Carregar features visuais e normalizar nomes
df_feats = pd.read_csv(FEATURES_PATH)
df_feats['video'] = df_feats['video'].astype(str).str.strip().str.lower()

# Juntar os dados
df_merged = pd.merge(df_b, df_feats, left_on='VIDEO', right_on='video', how='inner')

# Features a correlacionar com os OSATS
features = ['SUTURES', 'num_pontos', 'densidade_fios', 'angulo_medio_fios',
            'num_linhas', 'simetria_horizontal', 'complexidade_visual']

# Calcular matriz de correlação
df_corr = df_merged[features + osats_cols].corr()

# Extrair apenas correlação das features com os OSATS
correlacoes = df_corr.loc[features, osats_cols].T  # OSATS como linhas

# Mostrar a tabela ordenada (podes trocar por qualquer variável)
print("📊 Correlação entre features e variáveis OSATS (Investigator B):")
print(correlacoes.round(4))



📊 Correlação entre features e variáveis OSATS (Investigator B):
                     SUTURES  num_pontos  densidade_fios  angulo_medio_fios  \
OSATS_RESPECT         0.4013     -0.0326         -0.1180            -0.0519   
OSATS_MOTION          0.7451      0.2074         -0.0418            -0.0959   
OSATS_INSTRUMENT      0.6449      0.1638         -0.0278            -0.1076   
OSATS_SUTURE          0.6560      0.1701         -0.0712            -0.1131   
OSATS_FLOW            0.7129      0.2113         -0.0290            -0.1145   
OSATS_KNOWLEDGE       0.7979      0.1008         -0.1326            -0.1640   
OSATS_PERFORMANCE     0.7516      0.1359         -0.1282            -0.1621   
OSATS_FINAL_QUALITY   0.7022      0.0708         -0.1285            -0.1542   

                     num_linhas  simetria_horizontal  complexidade_visual  
OSATS_RESPECT           -0.0267               0.1696              -0.0064  
OSATS_MOTION             0.0137               0.0971               0.131

## Modelo apenas a usar feature SUTURES

In [237]:
# Alvos OSATS
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar dados
train_df = pd.read_csv("dataframes_por_inspetor/train_B.csv")
test_df = pd.read_csv("dataframes_por_inspetor/test_B.csv")

# Usar apenas a feature SUTURES
X_train = train_df[["SUTURES"]]
X_test = test_df[["SUTURES"]]
y_train = train_df[osats_cols]

# Normalizar SUTURES
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=["SUTURES"])
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=["SUTURES"])

# Dataset PyTorch
class SuturesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = SuturesDataset(X_train_scaled, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Modelo simples
class SimpleSuturesModel(nn.Module):
    def __init__(self):
        super(SimpleSuturesModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(1, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 8)
        )
    def forward(self, x):
        return self.network(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleSuturesModel().to(device)
criterion = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Treino
best_loss = float('inf')
for epoch in range(100):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
    if total_loss < best_loss:
        best_loss = total_loss
        torch.save(model.state_dict(), "best_model_task2_sutures.pt")

# Previsão
model.load_state_dict(torch.load("best_model_task2_sutures.pt"))
model.eval()
X_test_tensor = torch.tensor(X_test_scaled.values, dtype=torch.float32).to(device)
with torch.no_grad():
    preds = model(X_test_tensor).cpu().numpy()

# Clip e round para [0, 4]
preds = np.clip(np.round(preds), 0, 4).astype(int)

# Guardar CSV
df_pred = pd.DataFrame(preds, columns=osats_cols)
df_pred.insert(0, "VIDEO", test_df["video"])
df_pred.to_csv("task2_predicoes_sutures.csv", index=False)
print("✅ CSV gerado: task2_predicoes_sutures.csv")

Epoch 1, Loss: 16.4123
Epoch 2, Loss: 15.7379
Epoch 3, Loss: 14.8363
Epoch 4, Loss: 13.6892
Epoch 5, Loss: 12.1772
Epoch 6, Loss: 10.2995
Epoch 7, Loss: 8.2131
Epoch 8, Loss: 6.3651
Epoch 9, Loss: 4.9867
Epoch 10, Loss: 4.1297
Epoch 11, Loss: 3.7561
Epoch 12, Loss: 3.3538
Epoch 13, Loss: 3.0714
Epoch 14, Loss: 2.8667
Epoch 15, Loss: 2.7244
Epoch 16, Loss: 2.6001
Epoch 17, Loss: 2.4863
Epoch 18, Loss: 2.3863
Epoch 19, Loss: 2.2854
Epoch 20, Loss: 2.2428
Epoch 21, Loss: 2.1736
Epoch 22, Loss: 2.1232
Epoch 23, Loss: 2.0846
Epoch 24, Loss: 2.0518
Epoch 25, Loss: 2.0234
Epoch 26, Loss: 2.0258
Epoch 27, Loss: 1.9933
Epoch 28, Loss: 1.9805
Epoch 29, Loss: 1.9642
Epoch 30, Loss: 1.9573
Epoch 31, Loss: 1.9571
Epoch 32, Loss: 1.9557
Epoch 33, Loss: 1.9310
Epoch 34, Loss: 1.9223
Epoch 35, Loss: 1.9192
Epoch 36, Loss: 1.9189
Epoch 37, Loss: 1.9127
Epoch 38, Loss: 1.9381
Epoch 39, Loss: 1.9193
Epoch 40, Loss: 1.8931
Epoch 41, Loss: 1.9207
Epoch 42, Loss: 1.8947
Epoch 43, Loss: 1.9012
Epoch 44, Loss

## Avaliação do Modelo

In [238]:
# Caminho para o ficheiro de anotações
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"

# Colunas a prever
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar previsões
df_pred = pd.read_csv("task2_predicoes_sutures.csv")
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
videos_preditos = set(df_pred["video"])
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(videos_preditos)]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols].mean().round().astype(int).reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Juntar previsões e verdadeiros
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))
print(f"📦 Total de amostras para avaliação: {len(df_merged)}\n")

# Avaliação métrica por coluna
f1_scores = []
accuracies = []
col_losses = []

for col in osats_cols:
    y_true = df_merged[f"{col}_true"]
    y_pred = df_merged[f"{col}_pred"]
    y_true_col = torch.tensor(df_merged[f"{col}_true"].values, dtype=torch.float32).to(device)
    y_pred_col = torch.tensor(df_merged[f"{col}_pred"].values, dtype=torch.float32).to(device)
    col_loss = criterion(y_pred_col, y_true_col).item()
    col_losses.append(col_loss)

    print(f"\n🔎 {col}: Exemplo (previsto vs real):")
    for i in range(min(5, len(df_merged))):
        print(f"  🎥 {df_merged['video'].iloc[i]} → Pred: {y_pred.iloc[i]}, Real: {y_true.iloc[i]}")

    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    f1_scores.append(f1)
    accuracies.append(acc)
    print(f"📊 {col}: F1-score = {f1:.4f}, Accuracy = {acc:.4f}")

# SmoothL1Loss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = torch.nn.SmoothL1Loss()
y_true_tensor = torch.tensor(df_merged[[f"{col}_true" for col in osats_cols]].values, dtype=torch.float32).to(device)
y_pred_tensor = torch.tensor(df_merged[[f"{col}_pred" for col in osats_cols]].values, dtype=torch.float32).to(device)
loss = criterion(y_pred_tensor, y_true_tensor).item()

# Task 1: GRS
def grs_class(score):
    if score <= 15:
        return 0
    elif score <= 23:
        return 1
    elif score <= 31:
        return 2
    else:
        return 3

# Garantir consistência
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(df_pred["video"])]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols + ["GLOBA_RATING_SCORE"]].mean().reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Merge previsões com reais
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))

# GRS verdadeiro e previsto
df_merged["GRS_true"] = df_merged["GLOBA_RATING_SCORE"].astype(int)
df_merged["GRS_pred"] = df_merged[[f"{col}_pred" for col in osats_cols]].sum(axis=1).astype(int)

# Classes
df_merged["GRS_class_true"] = df_merged["GRS_true"].apply(grs_class)
df_merged["GRS_class_pred"] = df_merged["GRS_pred"].apply(grs_class)

# Avaliar
f1_grs = f1_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"], average='macro')
acc_grs = accuracy_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"])

print("\n📊 MÉTRICAS GRS (Task 1 com apenas SUTURES):")
print(f"🎯 F1-score macro: {f1_grs:.4f}")
print(f"📊 Accuracy: {acc_grs:.4f}")

# Guardar CSV submissão
df_grs_submission = df_merged[["video", "GRS_class_pred"]].rename(columns={"GRS_class_pred": "GRS"})
df_grs_submission.to_csv("task1_predicoes_sutures.csv", index=False)
print("✅ CSV gerado: task1_predicoes_sutures.csv")

# Resultados globais
print("\n📈 MÉTRICAS GLOBAIS (modelo com apenas SUTURES):")
print(f"🎯 F1-score médio (macro): {np.mean(f1_scores):.4f}")
print(f"📊 Accuracy média: {np.mean(accuracies):.4f}")
print(f"💥 SmoothL1Loss final: {loss:.4f}")
print(f"📈 GRS (Task 1) - F1-score: {f1_grs:.4f}, Accuracy: {acc_grs:.4f}")


📦 Total de amostras para avaliação: 95


🔎 OSATS_RESPECT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 5
  🎥 o38n → Pred: 2, Real: 3
  🎥 k55r → Pred: 3, Real: 2
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_RESPECT: F1-score = 0.1778, Accuracy = 0.3368

🔎 OSATS_MOTION: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_MOTION: F1-score = 0.4079, Accuracy = 0.6737

🔎 OSATS_INSTRUMENT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 4, Real: 4
  🎥 o38n → Pred: 2, Real: 1
  🎥 k55r → Pred: 4, Real: 3
  🎥 g11v → Pred: 4, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_INSTRUMENT: F1-score = 0.3219, Accuracy = 0.5263

🔎 OSATS_SUTURE: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_SUTURE: F1-score = 0.2687, Accuracy = 0.421

### Tabela das métricas

In [239]:
# Construir tabela de métricas dinamicamente
metricas_df = pd.DataFrame({
    "Métrica": osats_cols,
    "F1-score": f1_scores,
    "Accuracy": accuracies,
    "Loss": col_losses
})

# Loss para GRS (Task 1)
loss_grs = criterion(
    torch.tensor(df_merged["GRS_pred"].values, dtype=torch.float32).to(device),
    torch.tensor(df_merged["GRS_true"].values, dtype=torch.float32).to(device)
).item()

# Adicionar GRS (Task 1)
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "GRS (Task 1)",
    "F1-score": f1_grs,
    "Accuracy": acc_grs,
    "Loss": loss_grs
}

media_col_loss = np.mean(col_losses)

# Adicionar médias
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "Média OSATS",
    "F1-score": np.mean(f1_scores),
    "Accuracy": np.mean(accuracies),
    "Loss": media_col_loss
}

# Adicionar SmoothL1Loss
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "SmoothL1Loss",
    "F1-score": np.nan,
    "Accuracy": np.nan,
    "Loss": loss
}

# Mostrar tabela
print("\n📋 Tabela de Métricas Completas:")
print(metricas_df.to_string(index=False))


📋 Tabela de Métricas Completas:
            Métrica  F1-score  Accuracy     Loss
      OSATS_RESPECT  0.177778  0.336842 0.500000
       OSATS_MOTION  0.407893  0.673684 0.184211
   OSATS_INSTRUMENT  0.321947  0.526316 0.268421
       OSATS_SUTURE  0.268732  0.421053 0.342105
         OSATS_FLOW  0.387062  0.621053 0.221053
    OSATS_KNOWLEDGE  0.424376  0.473684 0.294737
  OSATS_PERFORMANCE  0.351376  0.536842 0.284211
OSATS_FINAL_QUALITY  0.417714  0.494737 0.410526
       GRS (Task 1)  0.462300  0.515789 3.368421
        Média OSATS  0.344610  0.510526 0.313158
       SmoothL1Loss       NaN       NaN 0.313158


### Modelo 2 com feature SUTURES

In [240]:
class ImprovedSuturesModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(1, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 8)
        )
    def forward(self, x):
        return self.network(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ImprovedSuturesModel().to(device)
criterion = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Treino
best_loss = float('inf')
for epoch in range(100):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
    if total_loss < best_loss:
        best_loss = total_loss
        torch.save(model.state_dict(), "best_model_task2_sutures2.pt")


Epoch 1, Loss: 15.1091
Epoch 2, Loss: 12.3955
Epoch 3, Loss: 9.9362
Epoch 4, Loss: 8.1252
Epoch 5, Loss: 7.7097
Epoch 6, Loss: 7.5933
Epoch 7, Loss: 7.1771
Epoch 8, Loss: 7.2560
Epoch 9, Loss: 6.9330
Epoch 10, Loss: 6.7805
Epoch 11, Loss: 7.0921
Epoch 12, Loss: 6.7264
Epoch 13, Loss: 6.4227
Epoch 14, Loss: 6.6486
Epoch 15, Loss: 6.4514
Epoch 16, Loss: 6.2385
Epoch 17, Loss: 6.2190
Epoch 18, Loss: 6.1273
Epoch 19, Loss: 6.0291
Epoch 20, Loss: 5.8963
Epoch 21, Loss: 5.8338
Epoch 22, Loss: 5.5937
Epoch 23, Loss: 5.5520
Epoch 24, Loss: 5.4117
Epoch 25, Loss: 5.5089
Epoch 26, Loss: 5.2922
Epoch 27, Loss: 5.0772
Epoch 28, Loss: 4.7596
Epoch 29, Loss: 4.5997
Epoch 30, Loss: 4.4346
Epoch 31, Loss: 4.2180
Epoch 32, Loss: 4.2323
Epoch 33, Loss: 4.1451
Epoch 34, Loss: 3.9470
Epoch 35, Loss: 3.7840
Epoch 36, Loss: 3.7031
Epoch 37, Loss: 3.4601
Epoch 38, Loss: 3.1001
Epoch 39, Loss: 3.0533
Epoch 40, Loss: 2.8904
Epoch 41, Loss: 2.6547
Epoch 42, Loss: 2.7437
Epoch 43, Loss: 2.4252
Epoch 44, Loss: 2.

## Previsão e avaliação

In [241]:
# Previsão
model.load_state_dict(torch.load("best_model_task2_sutures2.pt"))
model.eval()
X_test_tensor = torch.tensor(X_test_scaled.values, dtype=torch.float32).to(device)
with torch.no_grad():
    preds = model(X_test_tensor).cpu().numpy()

# Clip e round para [0, 4]
preds = np.clip(np.round(preds), 0, 4).astype(int)

# Guardar CSV
df_pred = pd.DataFrame(preds, columns=osats_cols)
df_pred.insert(0, "VIDEO", test_df["video"])
df_pred.to_csv("task2_predicoes_sutures2.csv", index=False)
print("✅ CSV gerado: task2_predicoes_sutures2.csv")

✅ CSV gerado: task2_predicoes_sutures2.csv


### Calcular métricas

In [242]:
# Caminho para o ficheiro de anotações
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"

# Colunas a prever
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar previsões
df_pred = pd.read_csv("task2_predicoes_sutures2.csv")
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
videos_preditos = set(df_pred["video"])
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(videos_preditos)]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols].mean().round().astype(int).reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Juntar previsões e verdadeiros
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))
print(f"📦 Total de amostras para avaliação: {len(df_merged)}\n")

# Avaliação métrica por coluna
f1_scores = []
accuracies = []
col_losses = []

for col in osats_cols:
    y_true = df_merged[f"{col}_true"]
    y_pred = df_merged[f"{col}_pred"]
    y_true_col = torch.tensor(df_merged[f"{col}_true"].values, dtype=torch.float32).to(device)
    y_pred_col = torch.tensor(df_merged[f"{col}_pred"].values, dtype=torch.float32).to(device)
    col_loss = criterion(y_pred_col, y_true_col).item()
    col_losses.append(col_loss)

    print(f"\n🔎 {col}: Exemplo (previsto vs real):")
    for i in range(min(5, len(df_merged))):
        print(f"  🎥 {df_merged['video'].iloc[i]} → Pred: {y_pred.iloc[i]}, Real: {y_true.iloc[i]}")

    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    f1_scores.append(f1)
    accuracies.append(acc)
    print(f"📊 {col}: F1-score = {f1:.4f}, Accuracy = {acc:.4f}")

# SmoothL1Loss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = torch.nn.SmoothL1Loss()
y_true_tensor = torch.tensor(df_merged[[f"{col}_true" for col in osats_cols]].values, dtype=torch.float32).to(device)
y_pred_tensor = torch.tensor(df_merged[[f"{col}_pred" for col in osats_cols]].values, dtype=torch.float32).to(device)
loss = criterion(y_pred_tensor, y_true_tensor).item()

# Task 1: GRS
def grs_class(score):
    if score <= 15:
        return 0
    elif score <= 23:
        return 1
    elif score <= 31:
        return 2
    else:
        return 3

# Garantir consistência
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(df_pred["video"])]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols + ["GLOBA_RATING_SCORE"]].mean().reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Merge previsões com reais
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))

# GRS verdadeiro e previsto
df_merged["GRS_true"] = df_merged["GLOBA_RATING_SCORE"].astype(int)
df_merged["GRS_pred"] = df_merged[[f"{col}_pred" for col in osats_cols]].sum(axis=1).astype(int)

# Classes
df_merged["GRS_class_true"] = df_merged["GRS_true"].apply(grs_class)
df_merged["GRS_class_pred"] = df_merged["GRS_pred"].apply(grs_class)

# Avaliar
f1_grs = f1_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"], average='macro')
acc_grs = accuracy_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"])

print("\n📊 MÉTRICAS GRS (Task 1 com apenas SUTURES):")
print(f"🎯 F1-score macro: {f1_grs:.4f}")
print(f"📊 Accuracy: {acc_grs:.4f}")

# Guardar CSV submissão
df_grs_submission = df_merged[["video", "GRS_class_pred"]].rename(columns={"GRS_class_pred": "GRS"})
df_grs_submission.to_csv("task1_predicoes_sutures.csv", index=False)
print("✅ CSV gerado: task1_predicoes_sutures.csv")

# Resultados globais
print("\n📈 MÉTRICAS GLOBAIS (modelo com apenas SUTURES):")
print(f"🎯 F1-score médio (macro): {np.mean(f1_scores):.4f}")
print(f"📊 Accuracy média: {np.mean(accuracies):.4f}")
print(f"💥 SmoothL1Loss final: {loss:.4f}")
print(f"📈 GRS (Task 1) - F1-score: {f1_grs:.4f}, Accuracy: {acc_grs:.4f}")


📦 Total de amostras para avaliação: 95


🔎 OSATS_RESPECT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 5
  🎥 o38n → Pred: 2, Real: 3
  🎥 k55r → Pred: 3, Real: 2
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_RESPECT: F1-score = 0.2168, Accuracy = 0.3684

🔎 OSATS_MOTION: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 1, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 2, Real: 3
📊 OSATS_MOTION: F1-score = 0.3534, Accuracy = 0.4842

🔎 OSATS_INSTRUMENT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 4, Real: 4
  🎥 o38n → Pred: 2, Real: 1
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 4, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_INSTRUMENT: F1-score = 0.2926, Accuracy = 0.4526

🔎 OSATS_SUTURE: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_SUTURE: F1-score = 0.2417, Accuracy = 0.400

## Criar tabela

In [243]:
# Construir tabela de métricas dinamicamente
metricas_df = pd.DataFrame({
    "Métrica": osats_cols,
    "F1-score": f1_scores,
    "Accuracy": accuracies,
    "Loss": col_losses
})

# Loss para GRS (Task 1)
loss_grs = criterion(
    torch.tensor(df_merged["GRS_pred"].values, dtype=torch.float32).to(device),
    torch.tensor(df_merged["GRS_true"].values, dtype=torch.float32).to(device)
).item()

# Adicionar GRS (Task 1)
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "GRS (Task 1)",
    "F1-score": f1_grs,
    "Accuracy": acc_grs,
    "Loss": loss_grs
}

media_col_loss = np.mean(col_losses)

# Adicionar médias
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "Média OSATS",
    "F1-score": np.mean(f1_scores),
    "Accuracy": np.mean(accuracies),
    "Loss": media_col_loss
}

# Adicionar SmoothL1Loss
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "SmoothL1Loss",
    "F1-score": np.nan,
    "Accuracy": np.nan,
    "Loss": loss
}

# Mostrar tabela
print("\n📋 Tabela de Métricas Completas:")
print(metricas_df.to_string(index=False))


📋 Tabela de Métricas Completas:
            Métrica  F1-score  Accuracy     Loss
      OSATS_RESPECT  0.216815  0.368421 0.494737
       OSATS_MOTION  0.353381  0.484211 0.289474
   OSATS_INSTRUMENT  0.292572  0.452632 0.315789
       OSATS_SUTURE  0.241674  0.400000 0.363158
         OSATS_FLOW  0.348144  0.589474 0.236842
    OSATS_KNOWLEDGE  0.465365  0.515789 0.273684
  OSATS_PERFORMANCE  0.370927  0.526316 0.289474
OSATS_FINAL_QUALITY  0.264299  0.421053 0.478947
       GRS (Task 1)  0.448868  0.494737 3.778948
        Média OSATS  0.319147  0.469737 0.342763
       SmoothL1Loss       NaN       NaN 0.342763


## Modelo 3

In [244]:
class BalancedSuturesModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(1, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 8)
        )
    def forward(self, x):
        return self.network(x)

# Weighted loss com peso 2 para OSATS_SUTURE
class WeightedLoss(nn.Module):
    def __init__(self, weights):
        super().__init__()
        self.weights = torch.tensor(weights).float()
        self.criterion = nn.SmoothL1Loss(reduction='none')
    def forward(self, output, target):
        loss = self.criterion(output, target)
        return (loss * self.weights.to(output.device)).mean()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BalancedSuturesModel().to(device)

# Peso maior para OSATS_SUTURE (índice 3)
criterion = WeightedLoss([1, 1, 1, 2, 1, 1, 1, 1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Treino
best_loss = float('inf')
for epoch in range(100):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")
    if total_loss < best_loss:
        best_loss = total_loss
        torch.save(model.state_dict(), "best_model_task2_focused.pt")

# Previsão
model.load_state_dict(torch.load("best_model_task2_focused.pt"))
model.eval()
X_test_tensor = torch.tensor(X_test_scaled.values, dtype=torch.float32).to(device)
with torch.no_grad():
    preds = model(X_test_tensor).cpu().numpy()

# Clip e round para [0, 4]
preds = np.clip(np.round(preds), 0, 4).astype(int)

# Guardar CSV
df_pred = pd.DataFrame(preds, columns=osats_cols)
df_pred.insert(0, "VIDEO", test_df["video"])
df_pred.to_csv("task2_predicoes_sutures_focused.csv", index=False)
print("✅ CSV gerado: task2_predicoes_sutures_focused.csv")

Epoch 1, Loss: 18.2246
Epoch 2, Loss: 17.4077
Epoch 3, Loss: 16.5890
Epoch 4, Loss: 15.4548
Epoch 5, Loss: 14.0735
Epoch 6, Loss: 12.7440
Epoch 7, Loss: 11.2444
Epoch 8, Loss: 9.7732
Epoch 9, Loss: 8.5896
Epoch 10, Loss: 8.2812
Epoch 11, Loss: 7.5828
Epoch 12, Loss: 7.6742
Epoch 13, Loss: 7.3472
Epoch 14, Loss: 7.3881
Epoch 15, Loss: 7.0809
Epoch 16, Loss: 6.9536
Epoch 17, Loss: 6.8283
Epoch 18, Loss: 6.8377
Epoch 19, Loss: 6.6067
Epoch 20, Loss: 6.2341
Epoch 21, Loss: 6.6178
Epoch 22, Loss: 6.3403
Epoch 23, Loss: 6.3924
Epoch 24, Loss: 6.2782
Epoch 25, Loss: 5.9574
Epoch 26, Loss: 5.8577
Epoch 27, Loss: 5.7742
Epoch 28, Loss: 5.8512
Epoch 29, Loss: 5.5484
Epoch 30, Loss: 5.4018
Epoch 31, Loss: 5.5674
Epoch 32, Loss: 4.9594
Epoch 33, Loss: 5.3010
Epoch 34, Loss: 4.9495
Epoch 35, Loss: 4.6261
Epoch 36, Loss: 4.9522
Epoch 37, Loss: 4.5048
Epoch 38, Loss: 4.3799
Epoch 39, Loss: 4.2602
Epoch 40, Loss: 3.9129
Epoch 41, Loss: 3.9278
Epoch 42, Loss: 3.7508
Epoch 43, Loss: 3.4532
Epoch 44, Los

## Avaliação

In [245]:
# Caminho para o ficheiro de anotações
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"

# Colunas a prever
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar previsões
df_pred = pd.read_csv("task2_predicoes_sutures_focused.csv")
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
videos_preditos = set(df_pred["video"])
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(videos_preditos)]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols].mean().round().astype(int).reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Juntar previsões e verdadeiros
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))
print(f"📦 Total de amostras para avaliação: {len(df_merged)}\n")

# Avaliação métrica por coluna
f1_scores = []
accuracies = []
col_losses = []

criterion_eval = nn.SmoothL1Loss()

for col in osats_cols:
    y_true = df_merged[f"{col}_true"]
    y_pred = df_merged[f"{col}_pred"]
    y_true_col = torch.tensor(df_merged[f"{col}_true"].values, dtype=torch.float32).to(device)
    y_pred_col = torch.tensor(df_merged[f"{col}_pred"].values, dtype=torch.float32).to(device)
    col_loss = criterion_eval(y_pred_col, y_true_col).item()
    col_losses.append(col_loss)

    print(f"\n🔎 {col}: Exemplo (previsto vs real):")
    for i in range(min(5, len(df_merged))):
        print(f"  🎥 {df_merged['video'].iloc[i]} → Pred: {y_pred.iloc[i]}, Real: {y_true.iloc[i]}")

    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    f1_scores.append(f1)
    accuracies.append(acc)
    print(f"📊 {col}: F1-score = {f1:.4f}, Accuracy = {acc:.4f}")

# SmoothL1Loss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = torch.nn.SmoothL1Loss()
y_true_tensor = torch.tensor(df_merged[[f"{col}_true" for col in osats_cols]].values, dtype=torch.float32).to(device)
y_pred_tensor = torch.tensor(df_merged[[f"{col}_pred" for col in osats_cols]].values, dtype=torch.float32).to(device)
loss = criterion(y_pred_tensor, y_true_tensor).item()

# Task 1: GRS
def grs_class(score):
    if score <= 15:
        return 0
    elif score <= 23:
        return 1
    elif score <= 31:
        return 2
    else:
        return 3

# Garantir consistência
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(df_pred["video"])]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols + ["GLOBA_RATING_SCORE"]].mean().reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Merge previsões com reais
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))

# GRS verdadeiro e previsto
df_merged["GRS_true"] = df_merged["GLOBA_RATING_SCORE"].astype(int)
df_merged["GRS_pred"] = df_merged[[f"{col}_pred" for col in osats_cols]].sum(axis=1).astype(int)

# Classes
df_merged["GRS_class_true"] = df_merged["GRS_true"].apply(grs_class)
df_merged["GRS_class_pred"] = df_merged["GRS_pred"].apply(grs_class)

# Avaliar
f1_grs = f1_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"], average='macro')
acc_grs = accuracy_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"])

print("\n📊 MÉTRICAS GRS (Task 1 com apenas SUTURES):")
print(f"🎯 F1-score macro: {f1_grs:.4f}")
print(f"📊 Accuracy: {acc_grs:.4f}")

# Guardar CSV submissão
df_grs_submission = df_merged[["video", "GRS_class_pred"]].rename(columns={"GRS_class_pred": "GRS"})
df_grs_submission.to_csv("task1_predicoes_sutures.csv", index=False)
print("✅ CSV gerado: task1_predicoes_sutures.csv")

# Resultados globais
print("\n📈 MÉTRICAS GLOBAIS (modelo com apenas SUTURES):")
print(f"🎯 F1-score médio (macro): {np.mean(f1_scores):.4f}")
print(f"📊 Accuracy média: {np.mean(accuracies):.4f}")
print(f"💥 SmoothL1Loss final: {loss:.4f}")
print(f"📈 GRS (Task 1) - F1-score: {f1_grs:.4f}, Accuracy: {acc_grs:.4f}")


📦 Total de amostras para avaliação: 95


🔎 OSATS_RESPECT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 5
  🎥 o38n → Pred: 2, Real: 3
  🎥 k55r → Pred: 3, Real: 2
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_RESPECT: F1-score = 0.2771, Accuracy = 0.4211

🔎 OSATS_MOTION: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_MOTION: F1-score = 0.4754, Accuracy = 0.6211

🔎 OSATS_INSTRUMENT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 4, Real: 4
  🎥 o38n → Pred: 2, Real: 1
  🎥 k55r → Pred: 4, Real: 3
  🎥 g11v → Pred: 4, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_INSTRUMENT: F1-score = 0.3219, Accuracy = 0.5263

🔎 OSATS_SUTURE: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_SUTURE: F1-score = 0.2687, Accuracy = 0.421

## Tabela

In [246]:
# Construir tabela de métricas dinamicamente
metricas_df = pd.DataFrame({
    "Métrica": osats_cols,
    "F1-score": f1_scores,
    "Accuracy": accuracies,
    "Loss": col_losses
})

# Loss para GRS (Task 1)
loss_grs = criterion(
    torch.tensor(df_merged["GRS_pred"].values, dtype=torch.float32).to(device),
    torch.tensor(df_merged["GRS_true"].values, dtype=torch.float32).to(device)
).item()

# Adicionar GRS (Task 1)
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "GRS (Task 1)",
    "F1-score": f1_grs,
    "Accuracy": acc_grs,
    "Loss": loss_grs
}

media_col_loss = np.mean(col_losses)

# Adicionar médias
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "Média OSATS",
    "F1-score": np.mean(f1_scores),
    "Accuracy": np.mean(accuracies),
    "Loss": media_col_loss
}

# Adicionar SmoothL1Loss
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "SmoothL1Loss",
    "F1-score": np.nan,
    "Accuracy": np.nan,
    "Loss": loss
}

# Mostrar tabela
print("\n📋 Tabela de Métricas Completas:")
print(metricas_df.to_string(index=False))


📋 Tabela de Métricas Completas:
            Métrica  F1-score  Accuracy     Loss
      OSATS_RESPECT  0.277143  0.421053 0.468421
       OSATS_MOTION  0.475371  0.621053 0.231579
   OSATS_INSTRUMENT  0.321947  0.526316 0.268421
       OSATS_SUTURE  0.268732  0.421053 0.342105
         OSATS_FLOW  0.387062  0.621053 0.221053
    OSATS_KNOWLEDGE  0.461850  0.505263 0.278947
  OSATS_PERFORMANCE  0.351376  0.536842 0.284211
OSATS_FINAL_QUALITY  0.352591  0.484211 0.415789
       GRS (Task 1)  0.521284  0.536842 3.378947
        Média OSATS  0.362009  0.517105 0.313816
       SmoothL1Loss       NaN       NaN 0.313816


## Ensemble dos 3 Modelos anteriores (Média)

In [247]:
# Carregar previsões dos 3 modelos
df1 = pd.read_csv("task2_predicoes_sutures.csv")
df2 = pd.read_csv("task2_predicoes_sutures2.csv")
df3 = pd.read_csv("task2_predicoes_sutures_focused.csv")

# Normalizar nomes dos vídeos
for df in [df1, df2, df3]:
    df["VIDEO"] = df["VIDEO"].astype(str).str.strip().str.lower()

# Colunas OSATS
osats_cols = [col for col in df1.columns if col != "VIDEO"]

# Ensemble (média das previsões → round → clip entre 0 e 4)
ensemble_preds = df1.copy()
for col in osats_cols:
    ensemble_preds[col] = np.clip(
        np.round((df1[col] + df2[col] + df3[col]) / 3), 0, 4
    ).astype(int)

# Guardar CSV do ensemble
ensemble_preds.to_csv("task2_predicoes_sutures_ensemble.csv", index=False)
print("✅ CSV do ensemble guardado como 'task2_predicoes_sutures_ensemble.csv'")

✅ CSV do ensemble guardado como 'task2_predicoes_sutures_ensemble.csv'


## Avaliação

In [248]:
# Caminho para o ficheiro de anotações
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"

# Colunas a prever
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar previsões
df_pred = pd.read_csv("task2_predicoes_sutures_ensemble.csv")
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
videos_preditos = set(df_pred["video"])
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(videos_preditos)]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols].mean().round().astype(int).reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Juntar previsões e verdadeiros
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))
print(f"📦 Total de amostras para avaliação: {len(df_merged)}\n")

# Avaliação métrica por coluna
f1_scores = []
accuracies = []
col_losses = []

criterion_eval = nn.SmoothL1Loss()

for col in osats_cols:
    y_true = df_merged[f"{col}_true"]
    y_pred = df_merged[f"{col}_pred"]
    y_true_col = torch.tensor(df_merged[f"{col}_true"].values, dtype=torch.float32).to(device)
    y_pred_col = torch.tensor(df_merged[f"{col}_pred"].values, dtype=torch.float32).to(device)
    col_loss = criterion_eval(y_pred_col, y_true_col).item()
    col_losses.append(col_loss)

    print(f"\n🔎 {col}: Exemplo (previsto vs real):")
    for i in range(min(5, len(df_merged))):
        print(f"  🎥 {df_merged['video'].iloc[i]} → Pred: {y_pred.iloc[i]}, Real: {y_true.iloc[i]}")

    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    f1_scores.append(f1)
    accuracies.append(acc)
    print(f"📊 {col}: F1-score = {f1:.4f}, Accuracy = {acc:.4f}")

# SmoothL1Loss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = torch.nn.SmoothL1Loss()
y_true_tensor = torch.tensor(df_merged[[f"{col}_true" for col in osats_cols]].values, dtype=torch.float32).to(device)
y_pred_tensor = torch.tensor(df_merged[[f"{col}_pred" for col in osats_cols]].values, dtype=torch.float32).to(device)
loss = criterion(y_pred_tensor, y_true_tensor).item()

# Task 1: GRS
def grs_class(score):
    if score <= 15:
        return 0
    elif score <= 23:
        return 1
    elif score <= 31:
        return 2
    else:
        return 3

# Garantir consistência
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(df_pred["video"])]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols + ["GLOBA_RATING_SCORE"]].mean().reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Merge previsões com reais
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))

# GRS verdadeiro e previsto
df_merged["GRS_true"] = df_merged["GLOBA_RATING_SCORE"].astype(int)
df_merged["GRS_pred"] = df_merged[[f"{col}_pred" for col in osats_cols]].sum(axis=1).astype(int)

# Classes
df_merged["GRS_class_true"] = df_merged["GRS_true"].apply(grs_class)
df_merged["GRS_class_pred"] = df_merged["GRS_pred"].apply(grs_class)

# Avaliar
f1_grs = f1_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"], average='macro')
acc_grs = accuracy_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"])

print("\n📊 MÉTRICAS GRS (Task 1 com apenas SUTURES):")
print(f"🎯 F1-score macro: {f1_grs:.4f}")
print(f"📊 Accuracy: {acc_grs:.4f}")

# Guardar CSV submissão
df_grs_submission = df_merged[["video", "GRS_class_pred"]].rename(columns={"GRS_class_pred": "GRS"})
df_grs_submission.to_csv("task1_predicoes_sutures.csv", index=False)
print("✅ CSV gerado: task1_predicoes_sutures.csv")

# Resultados globais
print("\n📈 MÉTRICAS GLOBAIS (modelo com apenas SUTURES):")
print(f"🎯 F1-score médio (macro): {np.mean(f1_scores):.4f}")
print(f"📊 Accuracy média: {np.mean(accuracies):.4f}")
print(f"💥 SmoothL1Loss final: {loss:.4f}")
print(f"📈 GRS (Task 1) - F1-score: {f1_grs:.4f}, Accuracy: {acc_grs:.4f}")


📦 Total de amostras para avaliação: 95


🔎 OSATS_RESPECT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 5
  🎥 o38n → Pred: 2, Real: 3
  🎥 k55r → Pred: 3, Real: 2
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_RESPECT: F1-score = 0.2168, Accuracy = 0.3684

🔎 OSATS_MOTION: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_MOTION: F1-score = 0.4754, Accuracy = 0.6211

🔎 OSATS_INSTRUMENT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 4, Real: 4
  🎥 o38n → Pred: 2, Real: 1
  🎥 k55r → Pred: 4, Real: 3
  🎥 g11v → Pred: 4, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_INSTRUMENT: F1-score = 0.3219, Accuracy = 0.5263

🔎 OSATS_SUTURE: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_SUTURE: F1-score = 0.2687, Accuracy = 0.421

## Tabela

In [249]:
# Construir tabela de métricas dinamicamente
metricas_df = pd.DataFrame({
    "Métrica": osats_cols,
    "F1-score": f1_scores,
    "Accuracy": accuracies,
    "Loss": col_losses
})

# Loss para GRS (Task 1)
loss_grs = criterion(
    torch.tensor(df_merged["GRS_pred"].values, dtype=torch.float32).to(device),
    torch.tensor(df_merged["GRS_true"].values, dtype=torch.float32).to(device)
).item()

# Adicionar GRS (Task 1)
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "GRS (Task 1)",
    "F1-score": f1_grs,
    "Accuracy": acc_grs,
    "Loss": loss_grs
}

media_col_loss = np.mean(col_losses)

# Adicionar médias
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "Média OSATS",
    "F1-score": np.mean(f1_scores),
    "Accuracy": np.mean(accuracies),
    "Loss": media_col_loss
}

# Adicionar SmoothL1Loss
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "SmoothL1Loss",
    "F1-score": np.nan,
    "Accuracy": np.nan,
    "Loss": loss
}

# Mostrar tabela
print("\n📋 Tabela de Métricas Completas:")
print(metricas_df.to_string(index=False))


📋 Tabela de Métricas Completas:
            Métrica  F1-score  Accuracy     Loss
      OSATS_RESPECT  0.216815  0.368421 0.494737
       OSATS_MOTION  0.475371  0.621053 0.231579
   OSATS_INSTRUMENT  0.321947  0.526316 0.268421
       OSATS_SUTURE  0.268732  0.421053 0.342105
         OSATS_FLOW  0.387062  0.621053 0.221053
    OSATS_KNOWLEDGE  0.461850  0.505263 0.278947
  OSATS_PERFORMANCE  0.351376  0.536842 0.284211
OSATS_FINAL_QUALITY  0.351672  0.484211 0.415789
       GRS (Task 1)  0.448868  0.494737 3.415790
        Média OSATS  0.354353  0.510526 0.317105
       SmoothL1Loss       NaN       NaN 0.317105


## Ensemble dos 3 Modelos anteriores (Votação ponderada)

In [250]:
# Carregar previsões dos três modelos
df1 = pd.read_csv("task2_predicoes_sutures.csv")         # modelo 1
df2 = pd.read_csv("task2_predicoes_sutures2.csv")        # modelo 2
df3 = pd.read_csv("task2_predicoes_sutures_focused.csv") # modelo 3

# Normalizar nomes dos vídeos
for df in [df1, df2, df3]:
    df["VIDEO"] = df["VIDEO"].astype(str).str.strip().str.lower()

# Colunas OSATS
osats_cols = [col for col in df1.columns if col != "VIDEO"]

# Pesos por variável (modelo com melhor desempenho tem voto 2)
weights = {
    "OSATS_RESPECT": (2, 1, 1),   # modelo 1
    "OSATS_MOTION": (1, 1, 2),    # modelo 3
    "OSATS_INSTRUMENT": (2, 1, 1),# modelo 1
    "OSATS_SUTURE": (1, 1, 2),    # modelo 3
    "OSATS_FLOW": (1, 1, 2),      # modelo 3
    "OSATS_KNOWLEDGE": (1, 1, 2), # modelo 3
    "OSATS_PERFORMANCE": (2, 1, 1),# modelo 1
    "OSATS_FINAL_QUALITY": (2, 1, 1) # modelo 1
}

# Aplicar ensemble ponderado
ensemble_preds = df1.copy()
for col in osats_cols:
    w1, w2, w3 = weights[col]
    ensemble_preds[col] = np.clip(
        np.round((df1[col]*w1 + df2[col]*w2 + df3[col]*w3) / (w1 + w2 + w3)),
        0, 4
    ).astype(int)

# Guardar CSV
ensemble_preds.to_csv("task2_predicoes_sutures_ensemble_ponderado.csv", index=False)
print("✅ Ensemble ponderado guardado como 'task2_predicoes_sutures_ensemble_ponderado.csv'")

✅ Ensemble ponderado guardado como 'task2_predicoes_sutures_ensemble_ponderado.csv'


## Avaliação

In [251]:
# Caminho para o ficheiro de anotações
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"

# Colunas a prever
osats_cols = [
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY'
]

# Carregar previsões
df_pred = pd.read_csv("task2_predicoes_sutures_ensemble_ponderado.csv")
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
videos_preditos = set(df_pred["video"])
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(videos_preditos)]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols].mean().round().astype(int).reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Juntar previsões e verdadeiros
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))
print(f"📦 Total de amostras para avaliação: {len(df_merged)}\n")

# Avaliação métrica por coluna
f1_scores = []
accuracies = []
col_losses = []

criterion_eval = nn.SmoothL1Loss()

for col in osats_cols:
    y_true = df_merged[f"{col}_true"]
    y_pred = df_merged[f"{col}_pred"]
    y_true_col = torch.tensor(df_merged[f"{col}_true"].values, dtype=torch.float32).to(device)
    y_pred_col = torch.tensor(df_merged[f"{col}_pred"].values, dtype=torch.float32).to(device)
    col_loss = criterion_eval(y_pred_col, y_true_col).item()
    col_losses.append(col_loss)

    print(f"\n🔎 {col}: Exemplo (previsto vs real):")
    for i in range(min(5, len(df_merged))):
        print(f"  🎥 {df_merged['video'].iloc[i]} → Pred: {y_pred.iloc[i]}, Real: {y_true.iloc[i]}")

    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    f1_scores.append(f1)
    accuracies.append(acc)
    print(f"📊 {col}: F1-score = {f1:.4f}, Accuracy = {acc:.4f}")

# SmoothL1Loss
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = torch.nn.SmoothL1Loss()
y_true_tensor = torch.tensor(df_merged[[f"{col}_true" for col in osats_cols]].values, dtype=torch.float32).to(device)
y_pred_tensor = torch.tensor(df_merged[[f"{col}_pred" for col in osats_cols]].values, dtype=torch.float32).to(device)
loss = criterion(y_pred_tensor, y_true_tensor).item()

# Task 1: GRS
def grs_class(score):
    if score <= 15:
        return 0
    elif score <= 23:
        return 1
    elif score <= 31:
        return 2
    else:
        return 3

# Garantir consistência
df_pred.rename(columns={"VIDEO": "video"}, inplace=True)
df_pred["video"] = df_pred["video"].astype(str).str.strip().str.lower()

# Carregar anotação real
df_osats = pd.read_csv(CSV_PATH, sep=";")
df_osats["VIDEO"] = df_osats["VIDEO"].astype(str).str.strip().str.lower()
df_osats_b = df_osats[df_osats["INVESTIGATOR"] == "B"].copy()

# Filtrar vídeos com previsão
df_osats_b = df_osats_b[df_osats_b["VIDEO"].isin(df_pred["video"])]

# Agregar valores reais por vídeo
df_truth = df_osats_b.groupby("VIDEO")[osats_cols + ["GLOBA_RATING_SCORE"]].mean().reset_index()
df_truth.rename(columns={"VIDEO": "video"}, inplace=True)

# Merge previsões com reais
df_merged = pd.merge(df_pred, df_truth, on="video", suffixes=("_pred", "_true"))

# GRS verdadeiro e previsto
df_merged["GRS_true"] = df_merged["GLOBA_RATING_SCORE"].astype(int)
df_merged["GRS_pred"] = df_merged[[f"{col}_pred" for col in osats_cols]].sum(axis=1).astype(int)

# Classes
df_merged["GRS_class_true"] = df_merged["GRS_true"].apply(grs_class)
df_merged["GRS_class_pred"] = df_merged["GRS_pred"].apply(grs_class)

# Avaliar
f1_grs = f1_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"], average='macro')
acc_grs = accuracy_score(df_merged["GRS_class_true"], df_merged["GRS_class_pred"])

print("\n📊 MÉTRICAS GRS (Task 1 com apenas SUTURES):")
print(f"🎯 F1-score macro: {f1_grs:.4f}")
print(f"📊 Accuracy: {acc_grs:.4f}")

# Guardar CSV submissão
df_grs_submission = df_merged[["video", "GRS_class_pred"]].rename(columns={"GRS_class_pred": "GRS"})
df_grs_submission.to_csv("task1_predicoes_sutures.csv", index=False)
print("✅ CSV gerado: task1_predicoes_sutures.csv")

# Resultados globais
print("\n📈 MÉTRICAS GLOBAIS (modelo com apenas SUTURES):")
print(f"🎯 F1-score médio (macro): {np.mean(f1_scores):.4f}")
print(f"📊 Accuracy média: {np.mean(accuracies):.4f}")
print(f"💥 SmoothL1Loss final: {loss:.4f}")
print(f"📈 GRS (Task 1) - F1-score: {f1_grs:.4f}, Accuracy: {acc_grs:.4f}")


📦 Total de amostras para avaliação: 95


🔎 OSATS_RESPECT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 5
  🎥 o38n → Pred: 2, Real: 3
  🎥 k55r → Pred: 3, Real: 2
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_RESPECT: F1-score = 0.2168, Accuracy = 0.3684

🔎 OSATS_MOTION: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 3
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_MOTION: F1-score = 0.4754, Accuracy = 0.6211

🔎 OSATS_INSTRUMENT: Exemplo (previsto vs real):
  🎥 j88t → Pred: 4, Real: 4
  🎥 o38n → Pred: 2, Real: 1
  🎥 k55r → Pred: 4, Real: 3
  🎥 g11v → Pred: 4, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_INSTRUMENT: F1-score = 0.3219, Accuracy = 0.5263

🔎 OSATS_SUTURE: Exemplo (previsto vs real):
  🎥 j88t → Pred: 3, Real: 4
  🎥 o38n → Pred: 2, Real: 2
  🎥 k55r → Pred: 3, Real: 3
  🎥 g11v → Pred: 3, Real: 4
  🎥 z49x → Pred: 3, Real: 3
📊 OSATS_SUTURE: F1-score = 0.2687, Accuracy = 0.421

## Tabela

In [252]:
# Construir tabela de métricas dinamicamente
metricas_df = pd.DataFrame({
    "Métrica": osats_cols,
    "F1-score": f1_scores,
    "Accuracy": accuracies,
    "Loss": col_losses
})

# Loss para GRS (Task 1)
loss_grs = criterion(
    torch.tensor(df_merged["GRS_pred"].values, dtype=torch.float32).to(device),
    torch.tensor(df_merged["GRS_true"].values, dtype=torch.float32).to(device)
).item()

# Adicionar GRS (Task 1)
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "GRS (Task 1)",
    "F1-score": f1_grs,
    "Accuracy": acc_grs,
    "Loss": loss_grs
}

media_col_loss = np.mean(col_losses)

# Adicionar médias
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "Média OSATS",
    "F1-score": np.mean(f1_scores),
    "Accuracy": np.mean(accuracies),
    "Loss": media_col_loss
}

# Adicionar SmoothL1Loss
metricas_df.loc[len(metricas_df)] = {
    "Métrica": "SmoothL1Loss",
    "F1-score": np.nan,
    "Accuracy": np.nan,
    "Loss": loss
}

# Mostrar tabela
print("\n📋 Tabela de Métricas Completas:")
print(metricas_df.to_string(index=False))


📋 Tabela de Métricas Completas:
            Métrica  F1-score  Accuracy     Loss
      OSATS_RESPECT  0.216815  0.368421 0.494737
       OSATS_MOTION  0.475371  0.621053 0.231579
   OSATS_INSTRUMENT  0.321947  0.526316 0.268421
       OSATS_SUTURE  0.268732  0.421053 0.342105
         OSATS_FLOW  0.387062  0.621053 0.221053
    OSATS_KNOWLEDGE  0.461850  0.505263 0.278947
  OSATS_PERFORMANCE  0.351376  0.536842 0.284211
OSATS_FINAL_QUALITY  0.416053  0.494737 0.400000
       GRS (Task 1)  0.512986  0.536842 3.378947
        Média OSATS  0.362401  0.511842 0.315132
       SmoothL1Loss       NaN       NaN 0.315132
