In [1]:
# Libraries

import numpy as np
import joblib
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.svm import SVC
from sklearn.model_selection import GroupKFold, GridSearchCV
from sklearn.metrics import f1_score, make_scorer

In [None]:
# Verifying the names so it's kinda to delete
data_df = pd.read_pickle("Xtrain1.pkl") 

print(data_df.head())
print(data_df.columns)

--- Primeiras Linhas do DataFrame ---
     Patient_Id                                  Skeleton_Features
158           8  [0.01672805172463768, -0.5662699523188407, 0.0...
500          12  [0.03123780159177354, -0.5718956396, 0.0479631...
396           6  [0.023674554394231464, -0.5588386275217391, 0....
155           8  [0.009414516641025642, -0.5693782186153847, 0....
321           3  [0.015847331331651126, -0.5660038876666666, 0....

--- Nomes de Todas as Colunas ---
Index(['Patient_Id', 'Skeleton_Features'], dtype='object')


In [22]:
data_df = pd.read_pickle("Xtrain1.pkl")     

groups = data_df['Patient_Id'].values  
X_train = np.stack(data_df['Skeleton_Features'].values)               
#X_train = data_df.drop(columns=['Patient_Id']).values 
y_train = np.load("Ytrain1.npy")
    
print(f"Shapes dos Dados: X_train {X_train.shape}, y_train {y_train.shape}, groups {groups.shape}")
print(f"Pacientes únicos: {len(np.unique(groups))}")

Shapes dos Dados: X_train (700, 132), y_train (700,), groups (700,)
Pacientes únicos: 14


In [None]:
# -------------------------
# Cross-validation setup
# -------------------------
cv_strategy = GroupKFold(n_splits=7) 

f1_macro_scorer = make_scorer(f1_score, average='macro')

# -------------------------
# Pipeline 
# -------------------------
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("classifier", SVC(random_state=42))
])
param_grid_svc = [
    {
        "scaler": [StandardScaler(), RobustScaler()],
        "classifier": [SVC(kernel='rbf', random_state=42)],
        "classifier__C": [0.1, 1.0, 10.0, 100.0], 
        "classifier__gamma": ['scale', 0.01, 0.1]
    }
]

# -------------------------
# Grid search
# -------------------------
search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid_svc,
    scoring=f1_macro_scorer,    
    cv=cv_strategy,             
    n_jobs=-1,                  
    verbose=2,
    refit=True                  
)

search.fit(X_train, y_train, groups=groups) 


Fitting 7 folds for each of 24 candidates, totalling 168 fits


0,1,2
,estimator,Pipeline(step...m_state=42))])
,param_grid,"[{'classifier': [SVC(random_state=42)], 'classifier__C': [0.1, 1.0, ...], 'classifier__gamma': ['scale', 0.01, ...], 'scaler': [StandardScaler(), RobustScaler()]}]"
,scoring,make_scorer(f...average=macro)
,n_jobs,-1
,refit,True
,cv,GroupKFold(n_...shuffle=False)
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,0.01
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [None]:
# -------------------------
# Best model & evaluation
# -------------------------
best_model = search.best_estimator_
cv_f1 = search.best_score_
train_f1 = f1_score(y_train, best_model.predict(X_train), average='macro')

print("\n\n=== Best Model Results ===")
print(f"Best params {search.best_params_}")
print(f"F1 Score, Cross validation: {cv_f1:.4f}")
print(f"F1 Score, Training: {train_f1:.4f}")



=== Best Model Results ===
Best params {'classifier': SVC(random_state=42), 'classifier__C': 1.0, 'classifier__gamma': 0.01, 'scaler': StandardScaler()}
F1 Score, Cross validation: 0.9026
F1 Score, Training: 0.9917


In [35]:
# Salvar o melhor modelo (o Pipeline completo) para o mymodel.py
joblib.dump(best_model, "Classification_model.pkl")

['Classification_model.pkl']