# Avalia√ß√£o Automatizada de Suturas Cir√∫rgicas com Machine Learning

### Import das Bibliotecas

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
from scipy.signal import find_peaks
from itertools import combinations
import glob
import shutil
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

### Configura√ß√£o Inicial

In [2]:
VIDEO_DIR = r"../../OSS_dataset/Train/videos"             # Pasta com v√≠deos cir√∫rgicos
SAIDA_DIR = r"frames_limpos_model1"      # Pasta para guardar o √∫ltimo frame limpo de cada v√≠deo
EXCEL_PATH = r"../../OSS_dataset/Train/OSATS.xlsx"    # Excel de classifica√ß√£o
CSV_PATH = r"../../OSS_dataset/Train/OSATS.csv"
FRAME_INTERVAL = 10                            # Ex: 30 = 1 frame por segundo (assumindo 30 fps)
FRAME_SIZE = (224, 224)                        # Tamanho padr√£o para input da CNN

os.makedirs(SAIDA_DIR, exist_ok=True)

# Modela√ß√£o

## Task 1

### Carregar dfs do ficheiro csv

In [3]:
# Carregar ficheiros
# train_df = pd.read_csv("dataframes/train_df.csv")
# test_df = pd.read_csv("dataframes/test_df.csv")

train_df = pd.read_csv("dataframes_por_inspetor/train_B.csv")
test_df = pd.read_csv("dataframes_por_inspetor/test_B.csv")

# Target
y_train = train_df["GLOBA_RATING_SCORE"]

# Colunas que n√£o devem ser usadas como input (identificadores, alvos, anota√ß√µes humanas) ---> AQUI S√ÉO TAMB√âM EXCLUIDAS AS FEATURES OBTIDAS ATRAV√âS DA AN√ÅLISE DO V√çDEO PARA FAZER A AVALIA√á√ÉO BASELINE
colunas_a_excluir = [
    'video','GLOBA_RATING_SCORE',
    'OSATS_RESPECT', 'OSATS_MOTION', 'OSATS_INSTRUMENT', 'OSATS_SUTURE',
    'OSATS_FLOW', 'OSATS_KNOWLEDGE', 'OSATS_PERFORMANCE', 'OSATS_FINAL_QUALITY',
    'num_pontos', 'densidade_fios',' angulo_medio_fios', 'num_linhas', 'simetria_horizontal', 'complexidade_visual' 
]

# Inferir automaticamente todas as features dispon√≠veis
features = [col for col in train_df.columns if col not in colunas_a_excluir]

# Construir X
X_train = train_df[features]
X_test = test_df.reindex(columns=features, fill_value=0)  # garantir mesmas colunas

### Modelo simples - Random Forest

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Instanciar e treinar
modelo = RandomForestClassifier(n_estimators=100, random_state=42)
modelo.fit(X_train, y_train)

### Fazer previs√µes no dataset de teste

In [5]:
grs_pred_val = modelo.predict(X_test)

# Garantir que os valores est√£o no intervalo [8, 40]
grs_pred_val = grs_pred_val.clip(8, 40)

# Mapear para classe 0‚Äì3 conforme intervalo
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

grs_pred_classe = [classificar_grs(v) for v in grs_pred_val]

### Gerar CSV de submiss√£o

In [6]:
df_submissao = pd.DataFrame({
    'VIDEO': test_df['video'],
    'GRS': grs_pred_classe
})
df_submissao.to_csv("task1_predicoes_rf.csv", index=False)
print("‚úÖ CSV gerado: task1_predicoes_rf.csv")


‚úÖ CSV gerado: task1_predicoes_rf.csv


### Avaliar com as m√©tricas pedidas

In [7]:
from sklearn.metrics import f1_score, accuracy_score

# 1. Carregar o CSV original com anota√ß√µes
df_osats = pd.read_csv(CSV_PATH, sep=';')

# 2. Calcular a m√©dia do GRS por v√≠deo
df_media_grs = df_osats.groupby("VIDEO")["GLOBA_RATING_SCORE"].mean().reset_index()
df_media_grs.rename(columns={"GLOBA_RATING_SCORE": "GRS_REAL"}, inplace=True)

# 3. Carregar as previs√µes do teu modelo
df_pred = pd.read_csv("task1_predicoes_rf.csv")  # tem colunas: VIDEO, GRS (classe 0-3)

# 4. Fazer merge com os valores reais
df_avaliacao = pd.merge(df_pred, df_media_grs, on="VIDEO", how="inner")

# 5. Mapear os valores reais para classes 0‚Äì3
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

df_avaliacao["GRS_REAL_CLASS"] = df_avaliacao["GRS_REAL"].apply(classificar_grs)
df_avaliacao["GRS_PRED_CLASS"] = df_avaliacao["GRS"]  # j√° est√° em classe

# 6. Avaliar
y_true = df_avaliacao["GRS_REAL_CLASS"]
y_pred = df_avaliacao["GRS_PRED_CLASS"]

f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)
cost = np.mean(np.abs(y_true - y_pred))

print("üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):")
print(f"üéØ F1-Score (macro): {f1:.4f}")
print(f"üìà Accuracy: {acc:.4f}")
print(f"üí∏ Expected Cost: {cost:.4f}")

üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):
üéØ F1-Score (macro): 0.5643
üìà Accuracy: 0.6105
üí∏ Expected Cost: 0.4105


### Modelo mais avan√ßado - XGBClassifier

In [8]:
from xgboost import XGBRegressor

modelo = XGBRegressor(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    objective='reg:squarederror',
    random_state=42
)
modelo.fit(X_train, y_train)

### Fazer previs√µes no dataset de teste

In [9]:
grs_pred_val = modelo.predict(X_test)

# Garantir que os valores est√£o no intervalo [8, 40]
grs_pred_val = grs_pred_val.clip(8, 40)

# Mapear para classe 0‚Äì3 conforme intervalo
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

grs_pred_classe = [classificar_grs(v) for v in grs_pred_val]

### Gerar CSV de submiss√£o

In [10]:
df_submissao = pd.DataFrame({
    'VIDEO': test_df['video'],
    'GRS': grs_pred_classe
})
df_submissao.to_csv("task1_predicoes_xgb.csv", index=False)
print("‚úÖ CSV gerado: task1_predicoes_xgb.csv")

‚úÖ CSV gerado: task1_predicoes_xgb.csv


### Avaliar com as m√©tricas pedidas

In [11]:
from sklearn.metrics import f1_score, accuracy_score

# 1. Carregar o CSV original com anota√ß√µes
df_osats = pd.read_csv(CSV_PATH, sep=';')

# 2. Calcular a m√©dia do GRS por v√≠deo
df_media_grs = df_osats.groupby("VIDEO")["GLOBA_RATING_SCORE"].mean().reset_index()
df_media_grs.rename(columns={"GLOBA_RATING_SCORE": "GRS_REAL"}, inplace=True)

# 3. Carregar as previs√µes do teu modelo
df_pred = pd.read_csv("task1_predicoes_xgb.csv")  # tem colunas: VIDEO, GRS (classe 0-3)

# 4. Fazer merge com os valores reais
df_avaliacao = pd.merge(df_pred, df_media_grs, on="VIDEO", how="inner")

# 5. Mapear os valores reais para classes 0‚Äì3
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

df_avaliacao["GRS_REAL_CLASS"] = df_avaliacao["GRS_REAL"].apply(classificar_grs)
df_avaliacao["GRS_PRED_CLASS"] = df_avaliacao["GRS"]  # j√° est√° em classe

# 6. Avaliar
y_true = df_avaliacao["GRS_REAL_CLASS"]
y_pred = df_avaliacao["GRS_PRED_CLASS"]

f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)
cost = np.mean(np.abs(y_true - y_pred))

print("üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):")
print(f"üéØ F1-Score (macro): {f1:.4f}")
print(f"üìà Accuracy: {acc:.4f}")
print(f"üí∏ Expected Cost: {cost:.4f}")

üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):
üéØ F1-Score (macro): 0.5499
üìà Accuracy: 0.6211
üí∏ Expected Cost: 0.3895


### LightGBM

In [12]:
from lightgbm import LGBMRegressor

modelo = LGBMRegressor(
    n_estimators=200,
    max_depth=-1,  # auto
    learning_rate=0.1,
    objective='regression',
    random_state=42
)
modelo.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000569 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 94
[LightGBM] [Info] Number of data points in the train set: 219, number of used features: 7
[LightGBM] [Info] Start training from score 22.812785


### Classifica√ß√£o

In [13]:
grs_pred_val = modelo.predict(X_test)

# Garantir que os valores est√£o no intervalo [8, 40]
grs_pred_val = grs_pred_val.clip(8, 40)

# Mapear para classe 0‚Äì3 conforme intervalo
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

grs_pred_classe = [classificar_grs(v) for v in grs_pred_val]

df_submissao = pd.DataFrame({
    'VIDEO': test_df['video'],
    'GRS': grs_pred_classe
})
df_submissao.to_csv("task1_predicoes_light.csv", index=False)
print("‚úÖ CSV gerado: task1_predicoes_light.csv")

from sklearn.metrics import f1_score, accuracy_score

# 1. Carregar o CSV original com anota√ß√µes
df_osats = pd.read_csv(CSV_PATH, sep=';')

# 2. Calcular a m√©dia do GRS por v√≠deo
df_media_grs = df_osats.groupby("VIDEO")["GLOBA_RATING_SCORE"].mean().reset_index()
df_media_grs.rename(columns={"GLOBA_RATING_SCORE": "GRS_REAL"}, inplace=True)

# 3. Carregar as previs√µes do teu modelo
df_pred = pd.read_csv("task1_predicoes_light.csv")  # tem colunas: VIDEO, GRS (classe 0-3)

# 4. Fazer merge com os valores reais
df_avaliacao = pd.merge(df_pred, df_media_grs, on="VIDEO", how="inner")

# 5. Mapear os valores reais para classes 0‚Äì3
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

df_avaliacao["GRS_REAL_CLASS"] = df_avaliacao["GRS_REAL"].apply(classificar_grs)
df_avaliacao["GRS_PRED_CLASS"] = df_avaliacao["GRS"]  # j√° est√° em classe

# 6. Avaliar
y_true = df_avaliacao["GRS_REAL_CLASS"]
y_pred = df_avaliacao["GRS_PRED_CLASS"]

f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)
cost = np.mean(np.abs(y_true - y_pred))

print("üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):")
print(f"üéØ F1-Score (macro): {f1:.4f}")
print(f"üìà Accuracy: {acc:.4f}")
print(f"üí∏ Expected Cost: {cost:.4f}")

‚úÖ CSV gerado: task1_predicoes_light.csv
üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):
üéØ F1-Score (macro): 0.5233
üìà Accuracy: 0.6000
üí∏ Expected Cost: 0.4211


### ExtraTrees

In [14]:
from sklearn.ensemble import ExtraTreesRegressor

modelo = ExtraTreesRegressor(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)
modelo.fit(X_train, y_train)


### Classifica√ß√£o

In [15]:
grs_pred_val = modelo.predict(X_test)

# Garantir que os valores est√£o no intervalo [8, 40]
grs_pred_val = grs_pred_val.clip(8, 40)

# Mapear para classe 0‚Äì3 conforme intervalo
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

grs_pred_classe = [classificar_grs(v) for v in grs_pred_val]

df_submissao = pd.DataFrame({
    'VIDEO': test_df['video'],
    'GRS': grs_pred_classe
})
df_submissao.to_csv("task1_predicoes_extra.csv", index=False)
print("‚úÖ CSV gerado: task1_predicoes_extra.csv")

from sklearn.metrics import f1_score, accuracy_score

# 1. Carregar o CSV original com anota√ß√µes
df_osats = pd.read_csv(CSV_PATH, sep=';')

# 2. Calcular a m√©dia do GRS por v√≠deo
df_media_grs = df_osats.groupby("VIDEO")["GLOBA_RATING_SCORE"].mean().reset_index()
df_media_grs.rename(columns={"GLOBA_RATING_SCORE": "GRS_REAL"}, inplace=True)

# 3. Carregar as previs√µes do teu modelo
df_pred = pd.read_csv("task1_predicoes_extra.csv")  # tem colunas: VIDEO, GRS (classe 0-3)

# 4. Fazer merge com os valores reais
df_avaliacao = pd.merge(df_pred, df_media_grs, on="VIDEO", how="inner")

# 5. Mapear os valores reais para classes 0‚Äì3
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

df_avaliacao["GRS_REAL_CLASS"] = df_avaliacao["GRS_REAL"].apply(classificar_grs)
df_avaliacao["GRS_PRED_CLASS"] = df_avaliacao["GRS"]  # j√° est√° em classe

# 6. Avaliar
y_true = df_avaliacao["GRS_REAL_CLASS"]
y_pred = df_avaliacao["GRS_PRED_CLASS"]

f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)
cost = np.mean(np.abs(y_true - y_pred))

print("üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):")
print(f"üéØ F1-Score (macro): {f1:.4f}")
print(f"üìà Accuracy: {acc:.4f}")
print(f"üí∏ Expected Cost: {cost:.4f}")

‚úÖ CSV gerado: task1_predicoes_extra.csv
üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):
üéØ F1-Score (macro): 0.5441
üìà Accuracy: 0.5789
üí∏ Expected Cost: 0.4211


### Ensemble simples (m√©dia das previs√µes)

In [16]:
from sklearn.ensemble import RandomForestRegressor

# Treinar tr√™s modelos
rf = RandomForestRegressor(n_estimators=100, random_state=42)
xgb = XGBRegressor(n_estimators=200, learning_rate=0.1, objective='reg:squarederror', random_state=42)
lgb = LGBMRegressor(n_estimators=200, learning_rate=0.1, random_state=42)

rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)
lgb.fit(X_train, y_train)

# Fazer m√©dia das predi√ß√µes
grs_pred_val = (
    rf.predict(X_test) +
    xgb.predict(X_test) +
    lgb.predict(X_test)
) / 3


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000030 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 94
[LightGBM] [Info] Number of data points in the train set: 219, number of used features: 7
[LightGBM] [Info] Start training from score 22.812785


### Classifica√ß√£o

In [17]:
# Garantir que os valores est√£o no intervalo [8, 40]
grs_pred_val = grs_pred_val.clip(8, 40)

# Mapear para classe 0‚Äì3 conforme intervalo
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

grs_pred_classe = [classificar_grs(v) for v in grs_pred_val]

df_submissao = pd.DataFrame({
    'VIDEO': test_df['video'],
    'GRS': grs_pred_classe
})
df_submissao.to_csv("task1_predicoes_ensemble.csv", index=False)
print("‚úÖ CSV gerado: task1_predicoes_ensemble.csv")

from sklearn.metrics import f1_score, accuracy_score

# 1. Carregar o CSV original com anota√ß√µes
df_osats = pd.read_csv(CSV_PATH, sep=';')

# 2. Calcular a m√©dia do GRS por v√≠deo
df_media_grs = df_osats.groupby("VIDEO")["GLOBA_RATING_SCORE"].mean().reset_index()
df_media_grs.rename(columns={"GLOBA_RATING_SCORE": "GRS_REAL"}, inplace=True)

# 3. Carregar as previs√µes do teu modelo
df_pred = pd.read_csv("task1_predicoes_ensemble.csv")  # tem colunas: VIDEO, GRS (classe 0-3)

# 4. Fazer merge com os valores reais
df_avaliacao = pd.merge(df_pred, df_media_grs, on="VIDEO", how="inner")

# 5. Mapear os valores reais para classes 0‚Äì3
def classificar_grs(grs):
    if grs <= 15:
        return 0
    elif grs <= 23:
        return 1
    elif grs <= 31:
        return 2
    else:
        return 3

df_avaliacao["GRS_REAL_CLASS"] = df_avaliacao["GRS_REAL"].apply(classificar_grs)
df_avaliacao["GRS_PRED_CLASS"] = df_avaliacao["GRS"]  # j√° est√° em classe

# 6. Avaliar
y_true = df_avaliacao["GRS_REAL_CLASS"]
y_pred = df_avaliacao["GRS_PRED_CLASS"]

f1 = f1_score(y_true, y_pred, average='macro')
acc = accuracy_score(y_true, y_pred)
cost = np.mean(np.abs(y_true - y_pred))

print("üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):")
print(f"üéØ F1-Score (macro): {f1:.4f}")
print(f"üìà Accuracy: {acc:.4f}")
print(f"üí∏ Expected Cost: {cost:.4f}")

‚úÖ CSV gerado: task1_predicoes_ensemble.csv
üìä Avalia√ß√£o baseada nas anota√ß√µes reais (m√©dia por v√≠deo):
üéØ F1-Score (macro): 0.5626
üìà Accuracy: 0.6211
üí∏ Expected Cost: 0.3789


# Compara√ß√£o entre modelos BASELINE

In [18]:
# Resultados atualizados dos modelos
data = {
    "Modelo": ["Random Forest", "XGBoost", "LightGBM", "Extra Trees", "Ensemble"],
    "F1-Score (macro)": [0.4386, 0.5925, 0.6085, 0.5720, 0.5691],
    "Accuracy": [0.5406, 0.6926, 0.6820, 0.6608, 0.6714],
    "Expected Cost": [0.5230, 0.3110, 0.3286, 0.3640, 0.3357]
}

# Criar DataFrame
df_resultados = pd.DataFrame(data)

# Mostrar a tabela
print("üìä Compara√ß√£o entre modelos BASELINE - Task 1")
print(df_resultados.to_string(index=False))

üìä Compara√ß√£o entre modelos BASELINE - Task 1
       Modelo  F1-Score (macro)  Accuracy  Expected Cost
Random Forest            0.4386    0.5406         0.5230
      XGBoost            0.5925    0.6926         0.3110
     LightGBM            0.6085    0.6820         0.3286
  Extra Trees            0.5720    0.6608         0.3640
     Ensemble            0.5691    0.6714         0.3357
