#### Vit Train on Features extracted
* Create a new Dataset with the extracted features
* Create a new model
* Train the model

In [1]:
from drecg.data.utils import FeaturesDataset
train_dataset = FeaturesDataset('features_ext_vit/train_features.pt')
train_dataset_augmented = FeaturesDataset('features_ext_vit/train_features_augmented.pt')
validation_dataset = FeaturesDataset('features_ext_vit/validation_features.pt')
test_dataset = FeaturesDataset('features_ext_vit/test_features.pt')


In [2]:
import torch
import numpy as np
x_train = (train_dataset_augmented.features_a - train_dataset_augmented.features_b).numpy()
y_train = train_dataset_augmented.labels.numpy()

x_val = (validation_dataset.features_a - validation_dataset.features_b).numpy()
y_val = validation_dataset.labels.numpy()

x_test = (test_dataset.features_a - test_dataset.features_b).numpy()
y_test = test_dataset.labels.numpy()

In [3]:
x_test.shape, x_train.shape, x_val.shape

((346, 1024), (11658, 1024), (164, 1024))

In [4]:
import xgboost as xgb
dtrain = xgb.DMatrix(x_train, label=y_train)
dval = xgb.DMatrix(x_val, label=y_val)
dtest = xgb.DMatrix(x_test, label=y_test)

In [14]:
param = { 'objective': 'binary:logistic', 
'max_depth': 20, 'eta': 0.11, 'gamma': 1.0, 'min_child_weight': 2, 'subsample': 0.8,}

param['eval_metric'] = ['auc', 'error']
evallist = [(dtrain, 'train'), (dval, 'eval')]
num_round = 48
bst = xgb.train(param, dtrain, num_round, evallist)

[0]	train-auc:0.91402	train-error:0.12464	eval-auc:0.78049	eval-error:0.25000
[1]	train-auc:0.95607	train-error:0.08355	eval-auc:0.81715	eval-error:0.26829
[2]	train-auc:0.97025	train-error:0.07017	eval-auc:0.84890	eval-error:0.21951
[3]	train-auc:0.97510	train-error:0.05764	eval-auc:0.85173	eval-error:0.20732
[4]	train-auc:0.97931	train-error:0.05395	eval-auc:0.85990	eval-error:0.18293
[5]	train-auc:0.98231	train-error:0.04735	eval-auc:0.86704	eval-error:0.17683
[6]	train-auc:0.98507	train-error:0.04649	eval-auc:0.87203	eval-error:0.17683
[7]	train-auc:0.98668	train-error:0.04023	eval-auc:0.88407	eval-error:0.16463
[8]	train-auc:0.98857	train-error:0.03920	eval-auc:0.88340	eval-error:0.16463
[9]	train-auc:0.98966	train-error:0.03688	eval-auc:0.88548	eval-error:0.17073
[10]	train-auc:0.99131	train-error:0.03440	eval-auc:0.89188	eval-error:0.15854
[11]	train-auc:0.99265	train-error:0.03122	eval-auc:0.88995	eval-error:0.14634
[12]	train-auc:0.99451	train-error:0.02899	eval-auc:0.89099	ev

In [15]:
bst.eval(dtest)

'[0]\teval-error:0.22832369942196531\teval-error:0.22832369942196531'

In [4]:
import mlflow
from pathlib import Path
mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment_id = mlflow.create_experiment(
    "Feature Extraction VIT + xGBoost",
    tags={"version": "v1", "type": "vit_features_xgboost"},
)

experiment = mlflow.set_experiment("Feature Extraction VIT + xGBoost")