In [1]:
import glob
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common.features.feature_metadata import FeatureMetadata

In [2]:
training_files = 5
data_files = glob.glob('bid_data/*.tsv')
dfs = [pd.read_csv(file, sep='\t') for file in data_files[:training_files]]

train_data = TabularDataset(pd.concat(dfs, ignore_index=True))
train_data['bid_pos'] = train_data['bid_pos'].astype('category')

In [3]:
train_data.head()

Unnamed: 0,bid_pos,go_alone,def_alone,def_pos_rel,turn_card_level,bid_turn_suit,bid_next_suit,bid_green_suit,top_trump_strg,top_2_trump_strg,top_3_trump_strg,num_trump,num_next,num_voids,num_singletons,num_off_aces,num_tricks,points
0,0,0,0,0,6,1,0,0,7,7,7,2,0,1,1,0,2,-2
1,1,1,0,0,6,1,0,0,0,0,0,0,1,1,1,2,3,1
2,2,1,0,0,6,1,0,0,0,0,0,0,2,1,1,1,0,-2
3,1,1,1,1,6,1,0,0,0,0,0,0,2,1,1,1,3,4
4,2,1,1,1,6,1,0,0,10,13,13,3,1,1,2,0,4,4


In [4]:
label = 'points'
train_data[label].describe()

count    759360.000000
mean          0.143026
std           2.625323
min          -4.000000
25%          -2.000000
50%          -1.000000
75%           4.000000
max           4.000000
Name: points, dtype: float64

In [5]:
exclude = []
outputs = ['num_tricks', 'points']
outputs.remove(label)

type_map = {k: 'int' for k in list(train_data)}
type_map['bid_pos'] = 'category'
metadata = FeatureMetadata(type_map_raw=type_map)

predictor = TabularPredictor(problem_type='multiclass', label=label)
predictor.fit(train_data.drop(columns=(exclude + outputs)), feature_metadata=metadata, presets='best_quality')

No path specified. Models will be saved in: "AutogluonModels/ag-20240906_002609"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.11.9
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #202405300957~1722440358~24.04~aa0a43f~dev-Ubuntu SMP PREEMPT_DY
CPU Count:          8
Memory Avail:       9.86 GB / 15.34 GB (64.3%)
Disk Space Avail:   121.32 GB / 452.95 GB (26.8%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdou

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x75ccf99613d0>

In [6]:
test_data = TabularDataset(data_files[training_files])

y_pred = predictor.predict(test_data.drop(columns=([label] + exclude + outputs)))
y_pred.head()

Loaded data from: bid_data/bid_model_2-20240903_113350.tsv | Columns = 18 / 18 | Rows = 151660 -> 151660


0   -2
1   -2
2   -1
3   -2
4    4
Name: points, dtype: int64

In [7]:
predictor.evaluate(test_data, silent=True)

{'accuracy': 0.6805485955426612,
 'balanced_accuracy': 0.5311954949523658,
 'mcc': 0.5841253853075998}

In [8]:
predictor.leaderboard(test_data)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.680549,0.678884,accuracy,84.911109,205.193269,2310.579602,0.027997,0.067468,19.876624,2,True,9
1,NeuralNetFastAI_BAG_L1,0.680384,0.678589,accuracy,9.443434,13.527082,1137.381558,9.443434,13.527082,1137.381558,1,True,3
2,LightGBM_BAG_L1,0.680008,0.678334,accuracy,34.100608,75.680111,308.919551,34.100608,75.680111,308.919551,1,True,5
3,LightGBMXT_BAG_L1,0.677832,0.676728,accuracy,37.491557,94.055691,310.700342,37.491557,94.055691,310.700342,1,True,4
4,RandomForestEntr_BAG_L1,0.673243,0.671672,accuracy,2.975063,22.176573,100.366319,2.975063,22.176573,100.366319,1,True,7
5,RandomForestGini_BAG_L1,0.672755,0.670908,accuracy,3.269614,21.450882,96.194804,3.269614,21.450882,96.194804,1,True,6
6,CatBoost_BAG_L1,0.668291,0.650635,accuracy,0.5779,0.412035,437.506723,0.5779,0.412035,437.506723,1,True,8
7,KNeighborsDist_BAG_L1,0.599736,0.599924,accuracy,12.109985,52.144929,3.606371,12.109985,52.144929,3.606371,1,True,2
8,KNeighborsUnif_BAG_L1,0.599611,0.599955,accuracy,11.795844,43.717213,3.625467,11.795844,43.717213,3.625467,1,True,1
