In [1]:
import torch
import torch.nn.functional as F
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
from tqdm import tqdm
import gc
import seaborn as sns
import pickle

import numpy as np
import pandas as pd

from imblearn.under_sampling import RandomUnderSampler 
from sklearn.metrics import classification_report

from sklearn.model_selection import GroupKFold, LeaveOneGroupOut
from sklearn.model_selection import cross_val_predict

from xgboost import XGBRFRegressor, XGBRFClassifier
import xgboost

sns.set_context("notebook", font_scale=1.25)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('preprocessed.pickle', 'rb') as handle:
    mydict = pickle.load(handle)
    
truth = mydict['truth']
segments = mydict['segments']
m = mydict['m']

In [3]:
mm = m.copy()

### classifier

In [4]:
mm.run.max()

9

In [5]:
mm.segment.unique()

array(['preamble', 'instruction', 'input', 'question', 'postilla',
       'generated'], dtype=object)

In [6]:
# minimum number of generated tokens
mm[(mm.segment=='postilla')].groupby('pid').tokzero.max().min()

64

In [12]:
# beginning of generation
ds = mm[(mm.segment=='postilla')][['run', 'pid', 'tokzero', 'shape', 'true_ent', 'maxp']]

ds = ds.pivot(index=['run', 'pid'], columns='tokzero', values=['maxp', 'true_ent']).reset_index()
ds.columns = [''.join([str(c) for c in col]).strip() for col in ds.columns.values]
ds = ds.merge(truth, on=['run', 'pid'])


In [13]:
ds.correct.value_counts()

correct
True     4977
False    1903
Name: count, dtype: int64

In [14]:
y = ds.correct
pids = ds.pid
X = ds.drop(columns=['run', 'pid', 'correct', 'hallucinated'])

rus = RandomUnderSampler(random_state=42)
X_res, y_res = rus.fit_resample(X, y)
pids_res = pids[y_res.index]

X_res.columns = ['postilla_'+col for col in X.columns]

### beginning of generation

In [16]:
pids_res

0          9
1         10
2         12
3         14
7         23
        ... 
2707    1677
1194    1342
1037     896
2272     530
134      355
Name: pid, Length: 3806, dtype: int64

In [17]:
from sklearn.model_selection import GroupKFold

group_kfold = GroupKFold(n_splits=5)

# just get the first group
for i, (train_index, test_index) in enumerate(group_kfold.split(X_res, y_res, pids_res)):
    print(f"Fold {i}:")
    # print(f"  Train: index={train_index}, group={pids_res.iloc[train_index]}")
    # print(f"  Test:  index={test_index}, group={pids_res.iloc[test_index]}")
    break

Fold 0:


In [18]:
X_train = X_res.iloc[train_index]
y_train = y_res.iloc[train_index]
pids_train = pids_res.iloc[train_index]

X_test = X_res.iloc[test_index]
y_test = y_res.iloc[test_index]
pids_test = pids_res.iloc[test_index]

In [19]:
# must be true
len(set(pids_train.values) & set(pids_test.values))==0

True

In [20]:
from autogluon.tabular import TabularDataset, TabularPredictor

In [21]:
X_train['label'] = y_train
predictor = TabularPredictor(label='label', path='tablellama-high', log_to_file=True).fit(X_train, presets='high_quality')



  X_train['label'] = y_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['label'] = y_train
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.11.8
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.3.0: Thu Jan  2 20:24:16 PST 2025; root:xnu-11215.81.4~3/RELEASE_ARM64_T6000
CPU Count:          10
Memory Avail:       11.57 GB / 64.00 GB (18.1%)
Disk Space Avail:   71.55 GB / 926.35 GB (7.7%)
Presets specified: ['high_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will

[36m(_dystack pid=74806)[0m 	0.8329	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	0.01s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.04s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: KNeighborsDist_BAG_L1 ... Training model for up to 597.23s of the 896.55s of remaining time.
[36m(_dystack pid=74806)[0m 	0.8721	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	0.01s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.02s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: LightGBMXT_BAG_L1 ... Training model for up to 597.19s of the 896.51s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.34%)
[36m(_dystack pid=74806)[0m 	0.8736	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	2.99s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.07s	 = Validation runtim

[36m(_ray_fit pid=74981)[0m [1000]	valid_set's binary_error: 0.136095


[36m(_dystack pid=74806)[0m 	0.8662	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	1.78s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.12s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: NeuralNetTorch_r22_BAG_L1 ... Training model for up to 291.16s of the 590.48s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.14%)
[36m(_dystack pid=74806)[0m 	0.8784	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	18.53s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.19s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: XGBoost_r33_BAG_L1 ... Training model for up to 270.87s of the 570.19s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=3.24%)
[36m(_dystack pid

[36m(_dystack pid=74806)[0m 	0.8739	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	13.69s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.02s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: NeuralNetFastAI_r103_BAG_L1 ... Training model for up to 68.54s of the 367.86s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.23%)
[36m(_ray_fit pid=75150)[0m No improvement since epoch 11: early stopping
[36m(_dystack pid=74806)[0m 	0.8732	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	6.8s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.07s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: NeuralNetTorch_r14_BAG_L1 ... Training model for up to 59.97s of the 359.29s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFol

[36m(_ray_fit pid=75222)[0m [1000]	valid_set's binary_error: 0.0532544[32m [repeated 4x across cluster][0m


[36m(_dystack pid=74806)[0m 	0.9571	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	3.76s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.15s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: LightGBM_BAG_L2 ... Training model for up to 293.16s of the 293.06s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.38%)


[36m(_ray_fit pid=75232)[0m [1000]	valid_set's binary_error: 0.056213


[36m(_dystack pid=74806)[0m 	0.9523	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	5.08s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.13s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: RandomForestGini_BAG_L2 ... Training model for up to 286.59s of the 286.49s of remaining time.
[36m(_dystack pid=74806)[0m 	0.9375	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	0.7s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.1s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: RandomForestEntr_BAG_L2 ... Training model for up to 285.76s of the 285.66s of remaining time.
[36m(_dystack pid=74806)[0m 	0.9372	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	0.59s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.1s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: CatBoost_BAG_L2 ... Training model for up to 285.04s of the 284.95s of remaining time.
[36m(_dystack pid=74806

[36m(_ray_fit pid=75312)[0m [1000]	valid_set's binary_error: 0.0739645


[36m(_dystack pid=74806)[0m 	0.9471	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	7.63s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.14s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: NeuralNetFastAI_r191_BAG_L2 ... Training model for up to 167.44s of the 167.34s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.23%)
[36m(_dystack pid=74806)[0m 	0.9608	 = Validation score   (accuracy)
[36m(_dystack pid=74806)[0m 	8.18s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting model: CatBoost_r9_BAG_L2 ... Training model for up to 157.96s of the 157.86s of remaining time.
[36m(_dystack pid=74806)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=2.07%)
[36m(_dystack pi

[36m(_dystack pid=74806)[0m 	1.96s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L1 models, fit_strategy="sequential" ...
[36m(_dystack pid=74806)[0m Fitting model: NeuralNetTorch_r22_BAG_L1_FULL ...
[36m(_dystack pid=74806)[0m 	13.02s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L1 models, fit_strategy="sequential" ...
[36m(_dystack pid=74806)[0m Fitting model: XGBoost_r33_BAG_L1_FULL ...
[36m(_dystack pid=74806)[0m 	0.52s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting model: ExtraTrees_r42_BAG_L1_FULL | Skipping fit via cloning parent ...
[36m(_dystack pid=74806)[0m 	0.8s	 = Training   runtime
[36m(_dystack pid=74806)[0m 	0.14s	 = Validation runtime
[36m(_dystack pid=74806)[0m Fitting 1 L1 models, fit_strategy="sequential" ...
[36m(_dystack pid=74806)[0m Fitting model: CatBoost_r137_BAG_L1_FULL ...
[36m(_dystack pid=74806)[0m 	0.76s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L1 models, fit_strategy="

[36m(_dystack pid=74806)[0m 	11.63s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L2 models, fit_strategy="sequential" ...
[36m(_dystack pid=74806)[0m Fitting model: LightGBMLarge_BAG_L2_FULL ...
[36m(_dystack pid=74806)[0m 	5.07s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L2 models, fit_strategy="sequential" ...
[36m(_dystack pid=74806)[0m Fitting model: CatBoost_r177_BAG_L2_FULL ...
[36m(_dystack pid=74806)[0m 	2.41s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L2 models, fit_strategy="sequential" ...
[36m(_dystack pid=74806)[0m Fitting model: NeuralNetTorch_r79_BAG_L2_FULL ...
[36m(_dystack pid=74806)[0m 	23.47s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L2 models, fit_strategy="sequential" ...
[36m(_dystack pid=74806)[0m Fitting model: LightGBM_r131_BAG_L2_FULL ...
[36m(_dystack pid=74806)[0m 	6.3s	 = Training   runtime
[36m(_dystack pid=74806)[0m Fitting 1 L2 models, fit_strategy="sequential

	0	 = Optimal   num_stack_levels (Stacked Overfitting Occurred: True)
	1099s	 = DyStack   runtime |	2501s	 = Remaining runtime
Starting main fit with num_stack_levels=0.
	For future fit calls on this dataset, you can skip DyStack to save time: `predictor.fit(..., dynamic_stacking=False, num_stack_levels=0)`
Beginning AutoGluon training ... Time limit = 2501s
AutoGluon will save models to "/Users/bono/Library/CloudStorage/OneDrive-PolitecnicodiMilano/work/prin/llm-uncertainty/tablellama-high"
Train Data Rows:    3044
Train Data Columns: 130
Label Column:       label
Problem Type:       binary
Preprocessing data ...
Selected class <--> label mapping:  class 1 = True, class 0 = False
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    13814.80 MB
	Train Data (Original)  Memory Usage: 3.02 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manua

Fitting model: XGBoost_BAG_L1 ... Training model for up to 2464.15s of the 2464.15s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.57%)
	0.8742	 = Validation score   (accuracy)
	5.78s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ... Training model for up to 2456.71s of the 2456.71s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.13%)
	0.8778	 = Validation score   (accuracy)
	5.99s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ... Training model for up to 2449.67s of the 2449.67s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.81%)
	0.8735	 = Validation score   (accuracy)
	13.84s	 = Training   runtime
	0.18s	 = 

	0.8745	 = Validation score   (accuracy)
	5.37s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting model: ExtraTrees_r172_BAG_L1 ... Training model for up to 2083.77s of the 2083.77s of remaining time.
	0.8624	 = Validation score   (accuracy)
	0.58s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: CatBoost_r69_BAG_L1 ... Training model for up to 2083.07s of the 2083.07s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.30%)
	0.8729	 = Validation score   (accuracy)
	14.06s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetFastAI_r103_BAG_L1 ... Training model for up to 2068.06s of the 2068.06s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.23%)
	0.8735	 = Validation score   (accuracy)
	5.5s	 = Training   runtime
	0.05s	 = Validation runtime
Fitt

	0.867	 = Validation score   (accuracy)
	1.33s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: NeuralNetTorch_r143_BAG_L1 ... Training model for up to 1822.45s of the 1822.44s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.13%)
	0.8807	 = Validation score   (accuracy)
	16.66s	 = Training   runtime
	0.14s	 = Validation runtime
Fitting model: CatBoost_r128_BAG_L1 ... Training model for up to 1804.67s of the 1804.66s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.46%)
	0.8742	 = Validation score   (accuracy)
	114.32s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: NeuralNetFastAI_r111_BAG_L1 ... Training model for up to 1688.99s of the 1688.98s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 worker

	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.38%)
	0.8696	 = Validation score   (accuracy)
	5.12s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: NeuralNetFastAI_r172_BAG_L1 ... Training model for up to 1520.78s of the 1520.77s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.24%)
	0.8748	 = Validation score   (accuracy)
	3.38s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: CatBoost_r180_BAG_L1 ... Training model for up to 1516.38s of the 1516.38s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.89%)
	0.8748	 = Validation score   (accuracy)
	58.47s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: NeuralNetTorch_r76_BAG_L1 ... Training model for up to 1456.55s

Refitting models via `predictor.refit_full` using all of the data (combined train and validation)...
	Models trained in this way will have the suffix "_FULL" and have NaN validation score.
	This process is not bound by time_limit, but should take less time than the original `predictor.fit` call.
	To learn more, refer to the `.refit_full` method docstring which explains how "_FULL" models differ from normal models.
Fitting model: KNeighborsUnif_BAG_L1_FULL | Skipping fit via cloning parent ...
	0.01s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: KNeighborsDist_BAG_L1_FULL | Skipping fit via cloning parent ...
	0.01s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting 1 L1 models, fit_strategy="sequential" ...
Fitting model: LightGBMXT_BAG_L1_FULL ...
	1.66s	 = Training   runtime
Fitting 1 L1 models, fit_strategy="sequential" ...
Fitting model: LightGBM_BAG_L1_FULL ...
	1.83s	 = Training   runtime
Fitting model: RandomForestGini_BAG_L1_FULL | Skipping fit via c

Fitting model: RandomForest_r127_BAG_L1_FULL | Skipping fit via cloning parent ...
	2.84s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting 1 L1 models, fit_strategy="sequential" ...
Fitting model: NeuralNetFastAI_r134_BAG_L1_FULL ...
No improvement since epoch 0: early stopping
	1.03s	 = Training   runtime
Fitting model: RandomForest_r34_BAG_L1_FULL | Skipping fit via cloning parent ...
	1.61s	 = Training   runtime
	0.09s	 = Validation runtime
Fitting 1 L1 models, fit_strategy="sequential" ...
Fitting model: LightGBM_r94_BAG_L1_FULL ...
	1.29s	 = Training   runtime
Fitting 1 L1 models, fit_strategy="sequential" ...
Fitting model: NeuralNetTorch_r143_BAG_L1_FULL ...
	28.06s	 = Training   runtime
Fitting 1 L1 models, fit_strategy="sequential" ...
Fitting model: CatBoost_r128_BAG_L1_FULL ...
	9.44s	 = Training   runtime
Fitting 1 L1 models, fit_strategy="sequential" ...
Fitting model: NeuralNetFastAI_r111_BAG_L1_FULL ...
	Stopping at the best epoch learned earlier - 18.
	0.6s	 =

TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/Users/bono/Library/CloudStorage/OneDrive-PolitecnicodiMilano/work/prin/llm-uncertainty/tablellama-high")


In [22]:
# X_train['label'] = y_train
# predictions = predictor.predict(X_test)

In [23]:
pred_proba = predictor.predict_proba(X_test)
pred = pred_proba.values.argmax(axis=1).astype(bool)
print(classification_report(y_test.values, pred))

              precision    recall  f1-score   support

       False       0.58      0.40      0.47       371
        True       0.56      0.73      0.63       391

    accuracy                           0.57       762
   macro avg       0.57      0.56      0.55       762
weighted avg       0.57      0.57      0.55       762



In [24]:
%%time

from sklearn.model_selection import GroupKFold, LeaveOneGroupOut
from sklearn.model_selection import cross_val_predict

from xgboost import XGBRFRegressor, XGBRFClassifier
import xgboost

#partition cv by prompt
xgb_model = XGBRFClassifier(n_estimators=100, n_jobs=6)

# pred = cross_val_predict(xgb_model, X, y, cv=GroupKFold(n_splits=10), groups=pids)
# pred_proba = cross_val_predict(xgb_model, X, y, cv=LeaveOneGroupOut(), groups=pids, method='predict_proba')

# pred_proba = cross_val_predict(xgb_model, X_res, y_res, cv=GroupKFold(n_splits=10), groups=pids_res, method='predict_proba')

xgb_model.fit(X_train.drop(columns=['label']), y_train)
pred_proba = xgb_model.predict_proba(X_test)

CPU times: user 1.34 s, sys: 204 ms, total: 1.55 s
Wall time: 543 ms


In [25]:
pred = pred_proba.argmax(axis=1).astype(bool)
print(classification_report(y_test.values, pred))

              precision    recall  f1-score   support

       False       0.65      0.50      0.56       371
        True       0.61      0.74      0.67       391

    accuracy                           0.62       762
   macro avg       0.63      0.62      0.62       762
weighted avg       0.63      0.62      0.62       762



In [26]:
%%time

from sklearn.model_selection import GroupKFold, LeaveOneGroupOut
from sklearn.model_selection import cross_val_predict

from xgboost import XGBRFRegressor, XGBRFClassifier
import xgboost

#partition cv by prompt
xgb_model = XGBRFClassifier(n_estimators=100, n_jobs=6)

# pred = cross_val_predict(xgb_model, X, y, cv=GroupKFold(n_splits=10), groups=pids)
# pred_proba = cross_val_predict(xgb_model, X, y, cv=LeaveOneGroupOut(), groups=pids, method='predict_proba')

pred_proba = cross_val_predict(xgb_model, X_res, y_res, cv=GroupKFold(n_splits=10), groups=pids_res, method='predict_proba')


CPU times: user 14.1 s, sys: 1.57 s, total: 15.6 s
Wall time: 5.13 s


In [None]:
pred = pred_proba.argmax(axis=1).astype(bool)
print(classification_report(y_test.values, pred))