In [46]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import re
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import log_loss
import clip
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [47]:
torch.cuda.is_available()

True

In [48]:
torch.backends.cuda.sdp_kernel()

<contextlib._GeneratorContextManager at 0x25308ff9350>

In [49]:
torch.backends.cuda.enable_flash_sdp(True)

In [50]:
# paths 
# hearthstone
DATA_PATH_HEARTHSTONE_RACE = '../dataset/Hearthstone-Minion-race/' #label: race
DATA_PATH_HEARTHSTONE_CARDCLASS = '../dataset/Hearthstone-All-cardClass/' # label: card class
DATA_PATH_HEARTHSTONE_ALLSET = '../dataset/Hearthstone-All-set/' # label: set
DATA_PATH_HEARTHSTONE_SPELLSCHOOL = '../dataset/Hearthstone-Spell-spellSchool/' # label: set

In [51]:
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

# Helper Functions

In [52]:
def preprocess_df(df, id):
    df['text'] = df['text'].fillna('unknown')
    df['text'] = df['text'].apply(preprocess_sentence)
    df['artist'] = df['artist'].fillna('unknown')
    df['mechanics'] = df['mechanics'].fillna('unknown')
    df['mechanics'] = df['mechanics'].str.strip("[]''")
    if id:
        df['combined_text'] = df['name'].str.lower() + ' ' + df['id'].str.lower()  + ' ' + df['artist'].str.lower()  + ' ' + df['text'].str.lower()  + ' ' + df['mechanics'].str.lower() 
    else:
        df['combined_text'] = df['name'].str.lower() + ' ' + df['artist'].str.lower()  + ' ' + df['text'].str.lower()  + ' ' + df['mechanics'].str.lower() 
    return df

In [53]:
def preprocess_sentence(sentence):
    sentence = re.sub(r'<[^>]+>', '', sentence)
    sentence = re.sub(r'[^\w\s]', '', sentence)
    sentence = sentence.replace('\n', ' ')
    sentence = sentence.lower()
    sentence = ' '.join(sentence.split())
    return sentence

In [54]:
def combine_image_text(texts, images, clip_model, clip_preprocess):
    image_text = []
    label_list = []
    for idx in tqdm(range(len(texts))):
        text = texts[idx]
        image = images[idx]
        text = clip.tokenize(text).to(device)
        image = clip_preprocess(Image.open(image)).unsqueeze(0).to(device)

        with torch.no_grad():
            text_features = clip_model.encode_text(text)
            image_features = clip_model.encode_image(image)
        combined_features = torch.cat((text_features, image_features), 1)
        image_text.append(combined_features)
    return image_text

In [55]:
def process_df(df, data_path, clip_model, clip_preprocess, id):
    df = preprocess_df(df, id)
    texts = list(df['combined_text'])
    images = [data_path + img for img in list(df['Image Path'])]
    data = combine_image_text(texts, images, clip_model, clip_preprocess)
    return data

# Model Training and evaluation

In [56]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        feature = self.features[idx].clone().detach().to(torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return feature, label

In [57]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Model, self).__init__()
        self.fc_1 = nn.Linear(input_size, hidden_size)
        self.fc_2 = nn.Linear(hidden_size, hidden_size)
        self.fc_3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.4)
        
    def forward(self, x):
        x = x.view(x.size(0), -1) 
        out = self.dropout(self.relu(self.fc_1(x)))
        out = self.dropout(self.relu(self.fc_2(out)))
        out = self.fc_3(out)
        return out

In [58]:
def train(model, train_loader, dev_loader):
    for epoch in range(num_epochs):
        train_loss = 0.0
        val_loss = 0.0
        model.train()
        for feature, label in train_loader:
            feature = feature.to(device)
            label = label.to(device)
            output = model(feature)
            loss = criterion(output, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * feature.size(0)
            
        model.eval()
        with torch.no_grad():
            for dev_feature, dev_labels in dev_loader:
                dev_feature = dev_feature.to(device)
                dev_labels = dev_labels.to(device)
                output = model(dev_feature)
                loss = criterion(output, dev_labels)
                val_loss += loss.item() * dev_feature.size(0)
                    
        train_loss /= len(train_loader.dataset)
        val_loss /= len(dev_loader.dataset)

        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"  Train Loss: {train_loss:.4f} - Validation Loss: {val_loss:.4f}")

In [59]:
def evaluate_accuracy(model, test_loader):
    total_accuracy = 0.0
    total_samples = 0
    model.eval()
    with torch.no_grad():
        for feature, label in test_loader:
            feature = feature.to(device)
            label = label.to(device)
            output = model(feature)
            _, predicted = torch.max(output, 1)
            correct = (predicted == label).sum().item()
            total_accuracy += correct
            total_samples += label.size(0)
    accuracy = total_accuracy / total_samples
    print(f"Accuracy: {accuracy:.3f}")

In [96]:
from sklearn.metrics import log_loss
import numpy as np

def evaluate_log_loss(model, test_loader):
    all_predicted_probabilities = []
    all_labels = []

    model.eval()
    with torch.no_grad():
        for feature, label in test_loader:
            feature = feature.to(device)
            label = label.to(device)
            output = model(feature)
            probabilities = torch.nn.functional.softmax(output, dim=1)  # Calculate softmax probabilities
            probabilities = probabilities.cpu().numpy()  # Convert to numpy array
            probabilities = np.clip(probabilities, 1e-7, 1 - 1e-7)  # Clip probabilities to avoid extreme values
            all_predicted_probabilities.extend(probabilities)
            all_labels.extend(label.cpu().numpy())

    # Compute overall log loss using sklearn's log_loss function
    logloss = log_loss(all_labels, all_predicted_probabilities)
    return logloss

# Usage
logloss = evaluate_log_loss(model, test_loader)
print(f"Log Loss: {logloss:.3f}")


Log Loss: 1.789




# Race

In [62]:
# load data
df_train = pd.read_csv(DATA_PATH_HEARTHSTONE_RACE + "/train.csv")
df_test = pd.read_csv(DATA_PATH_HEARTHSTONE_RACE + "/test.csv")
df_dev = pd.read_csv(DATA_PATH_HEARTHSTONE_RACE + "/dev.csv")

In [63]:
label_encoder = LabelEncoder()

In [64]:
train_data = process_df(df_train, DATA_PATH_HEARTHSTONE_RACE, clip_model, clip_preprocess, True)
train_labels = label_encoder.fit_transform(df_train['race'])
test_data = process_df(df_test, DATA_PATH_HEARTHSTONE_RACE, clip_model, clip_preprocess, True)
test_labels = label_encoder.fit_transform(df_test['race'])
dev_data = process_df(df_dev, DATA_PATH_HEARTHSTONE_RACE, clip_model, clip_preprocess, True)
dev_labels = label_encoder.fit_transform(df_dev['race'])

100%|██████████████████████████████████████████████████████████████████████████████| 5398/5398 [05:22<00:00, 16.76it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1012/1012 [01:03<00:00, 16.04it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 337/337 [00:20<00:00, 16.34it/s]


In [65]:
# initialized model
input_size = train_data[0].size(1)
hidden_size = 128
num_epochs = 20
num_classes = len(label_encoder.classes_)
lr = 0.0002
batch_size = 16
    
model = Model(input_size, hidden_size, num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [66]:
# initialize data loader
train_dataset = CustomDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(test_data, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
dev_dataset = CustomDataset(dev_data, dev_labels)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size)

In [67]:
# train model and evaluation
train(model, train_loader, dev_loader)
evaluate_accuracy(model, test_loader)
evaluate_log_loss(model, test_loader)

Epoch [1/20]
  Train Loss: 1.7900 - Validation Loss: 1.4893
Epoch [2/20]
  Train Loss: 1.3783 - Validation Loss: 1.2656
Epoch [3/20]
  Train Loss: 1.1545 - Validation Loss: 1.0386
Epoch [4/20]
  Train Loss: 0.9819 - Validation Loss: 0.9156
Epoch [5/20]
  Train Loss: 0.8940 - Validation Loss: 0.8355
Epoch [6/20]
  Train Loss: 0.8062 - Validation Loss: 0.7836
Epoch [7/20]
  Train Loss: 0.7459 - Validation Loss: 0.7205
Epoch [8/20]
  Train Loss: 0.6734 - Validation Loss: 0.6944
Epoch [9/20]
  Train Loss: 0.6399 - Validation Loss: 0.6601
Epoch [10/20]
  Train Loss: 0.5845 - Validation Loss: 0.6347
Epoch [11/20]
  Train Loss: 0.5449 - Validation Loss: 0.6260
Epoch [12/20]
  Train Loss: 0.5123 - Validation Loss: 0.6247
Epoch [13/20]
  Train Loss: 0.4867 - Validation Loss: 0.5991
Epoch [14/20]
  Train Loss: 0.4607 - Validation Loss: 0.5862
Epoch [15/20]
  Train Loss: 0.4383 - Validation Loss: 0.5767
Epoch [16/20]
  Train Loss: 0.4038 - Validation Loss: 0.5746
Epoch [17/20]
  Train Loss: 0.401

In [None]:
evaluate_class_metrics(model, test_loader)

# Card Class

In [68]:
df_train = pd.read_csv(DATA_PATH_HEARTHSTONE_CARDCLASS + "/train.csv")
df_test = pd.read_csv(DATA_PATH_HEARTHSTONE_CARDCLASS + "/test.csv")
df_dev = pd.read_csv(DATA_PATH_HEARTHSTONE_CARDCLASS + "/dev.csv")

In [69]:
label_encoder = LabelEncoder()

In [70]:
train_data = process_df(df_train, DATA_PATH_HEARTHSTONE_CARDCLASS, clip_model, clip_preprocess, True)
train_labels = label_encoder.fit_transform(df_train['cardClass'])
test_data = process_df(df_test, DATA_PATH_HEARTHSTONE_CARDCLASS, clip_model, clip_preprocess, True)
test_labels = label_encoder.fit_transform(df_test['cardClass'])
dev_data = process_df(df_dev, DATA_PATH_HEARTHSTONE_CARDCLASS, clip_model, clip_preprocess, True)
dev_labels = label_encoder.fit_transform(df_dev['cardClass'])

100%|██████████████████████████████████████████████████████████████████████████████| 8561/8561 [08:51<00:00, 16.10it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1603/1603 [01:34<00:00, 16.96it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 536/536 [00:32<00:00, 16.51it/s]


In [71]:
class_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
class_mapping

{'DEATHKNIGHT': 0,
 'DEMONHUNTER': 1,
 'DRUID': 2,
 'HUNTER': 3,
 'MAGE': 4,
 'NEUTRAL': 5,
 'NONE_cardClass': 6,
 'PALADIN': 7,
 'PRIEST': 8,
 'ROGUE': 9,
 'SHAMAN': 10,
 'WARLOCK': 11,
 'WARRIOR': 12}

In [72]:
# initialized model
input_size = train_data[0].size(1)
hidden_size = 128
num_epochs = 20
num_classes = len(label_encoder.classes_)
lr = 0.0002
batch_size = 16
    
model = Model(input_size, hidden_size, num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [73]:
# initialize data loader
train_dataset = CustomDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(test_data, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
dev_dataset = CustomDataset(dev_data, dev_labels)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size)

In [74]:
# train model and evaluation
train(model, train_loader, dev_loader)
evaluate_accuracy(model, test_loader)
evaluate_log_loss(model, test_loader)

Epoch [1/20]
  Train Loss: 1.9485 - Validation Loss: 1.7714
Epoch [2/20]
  Train Loss: 1.7056 - Validation Loss: 1.5524
Epoch [3/20]
  Train Loss: 1.5532 - Validation Loss: 1.4228
Epoch [4/20]
  Train Loss: 1.4379 - Validation Loss: 1.3060
Epoch [5/20]
  Train Loss: 1.3421 - Validation Loss: 1.2318
Epoch [6/20]
  Train Loss: 1.2709 - Validation Loss: 1.1834
Epoch [7/20]
  Train Loss: 1.2020 - Validation Loss: 1.1418
Epoch [8/20]
  Train Loss: 1.1479 - Validation Loss: 1.1007
Epoch [9/20]
  Train Loss: 1.1073 - Validation Loss: 1.0693
Epoch [10/20]
  Train Loss: 1.0538 - Validation Loss: 1.0493
Epoch [11/20]
  Train Loss: 1.0113 - Validation Loss: 1.0362
Epoch [12/20]
  Train Loss: 0.9733 - Validation Loss: 1.0018
Epoch [13/20]
  Train Loss: 0.9300 - Validation Loss: 0.9919
Epoch [14/20]
  Train Loss: 0.8939 - Validation Loss: 0.9684
Epoch [15/20]
  Train Loss: 0.8541 - Validation Loss: 0.9799
Epoch [16/20]
  Train Loss: 0.8358 - Validation Loss: 0.9913
Epoch [17/20]
  Train Loss: 0.807

# Spellschool

In [75]:
df_train = pd.read_csv(DATA_PATH_HEARTHSTONE_SPELLSCHOOL + "/train.csv")
df_test = pd.read_csv(DATA_PATH_HEARTHSTONE_SPELLSCHOOL + "/test.csv")
df_dev = pd.read_csv(DATA_PATH_HEARTHSTONE_SPELLSCHOOL + "/dev.csv")

In [76]:
label_encoder = LabelEncoder()

In [77]:
train_data = process_df(df_train, DATA_PATH_HEARTHSTONE_SPELLSCHOOL, clip_model, clip_preprocess, True)
train_labels = label_encoder.fit_transform(df_train['spellSchool'])
test_data = process_df(df_test, DATA_PATH_HEARTHSTONE_SPELLSCHOOL, clip_model, clip_preprocess, True)
test_labels = label_encoder.fit_transform(df_test['spellSchool'])
dev_data = process_df(df_dev, DATA_PATH_HEARTHSTONE_SPELLSCHOOL, clip_model, clip_preprocess, True)
dev_labels = label_encoder.fit_transform(df_dev['spellSchool'])

100%|██████████████████████████████████████████████████████████████████████████████| 2715/2715 [02:43<00:00, 16.65it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 508/508 [00:30<00:00, 16.70it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 170/170 [00:10<00:00, 16.39it/s]


In [78]:
class_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
class_mapping

{'ARCANE': 0,
 'FEL': 1,
 'FIRE': 2,
 'FROST': 3,
 'HOLY': 4,
 'NATURE': 5,
 'NONE_spellSchool': 6,
 'SHADOW': 7}

In [79]:
# initialized model
input_size = train_data[0].size(1)
hidden_size = 128
num_epochs = 20
num_classes = len(label_encoder.classes_)
lr = 0.0002
batch_size = 16
    
model = Model(input_size, hidden_size, num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [80]:
# initialize data loader
train_dataset = CustomDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(test_data, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
dev_dataset = CustomDataset(dev_data, dev_labels)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size)

In [81]:
# train model and evaluation
train(model, train_loader, dev_loader)
evaluate_accuracy(model, test_loader)
evaluate_log_loss(model, test_loader)

Epoch [1/20]
  Train Loss: 1.3241 - Validation Loss: 1.1467
Epoch [2/20]
  Train Loss: 1.1155 - Validation Loss: 1.0641
Epoch [3/20]
  Train Loss: 1.0132 - Validation Loss: 0.9729
Epoch [4/20]
  Train Loss: 0.9169 - Validation Loss: 0.8972
Epoch [5/20]
  Train Loss: 0.8298 - Validation Loss: 0.8168
Epoch [6/20]
  Train Loss: 0.7530 - Validation Loss: 0.8205
Epoch [7/20]
  Train Loss: 0.6885 - Validation Loss: 0.7211
Epoch [8/20]
  Train Loss: 0.6185 - Validation Loss: 0.6820
Epoch [9/20]
  Train Loss: 0.5871 - Validation Loss: 0.6805
Epoch [10/20]
  Train Loss: 0.5267 - Validation Loss: 0.6220
Epoch [11/20]
  Train Loss: 0.4939 - Validation Loss: 0.6016
Epoch [12/20]
  Train Loss: 0.4635 - Validation Loss: 0.5671
Epoch [13/20]
  Train Loss: 0.4389 - Validation Loss: 0.5519
Epoch [14/20]
  Train Loss: 0.4091 - Validation Loss: 0.5563
Epoch [15/20]
  Train Loss: 0.3669 - Validation Loss: 0.5244
Epoch [16/20]
  Train Loss: 0.3503 - Validation Loss: 0.5503
Epoch [17/20]
  Train Loss: 0.332

# Set

In [82]:
df_train = pd.read_csv(DATA_PATH_HEARTHSTONE_ALLSET + "/train.csv")
df_test = pd.read_csv(DATA_PATH_HEARTHSTONE_ALLSET + "/test.csv")
df_dev = pd.read_csv(DATA_PATH_HEARTHSTONE_ALLSET + "/dev.csv")

In [83]:
label_encoder = LabelEncoder()

In [84]:
train_data = process_df(df_train, DATA_PATH_HEARTHSTONE_ALLSET, clip_model, clip_preprocess, False)
train_labels = label_encoder.fit_transform(df_train['set'])
test_data = process_df(df_test, DATA_PATH_HEARTHSTONE_ALLSET, clip_model, clip_preprocess, False)
test_labels = label_encoder.fit_transform(df_test['set'])
dev_data = process_df(df_dev, DATA_PATH_HEARTHSTONE_ALLSET, clip_model, clip_preprocess, False)
dev_labels = label_encoder.fit_transform(df_dev['set'])

100%|██████████████████████████████████████████████████████████████████████████████| 8548/8548 [08:22<00:00, 17.00it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1603/1603 [01:33<00:00, 17.21it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 532/532 [00:31<00:00, 16.95it/s]


In [85]:
class_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
class_mapping

{'ALTERAC_VALLEY': 0,
 'BATTLEGROUNDS': 1,
 'BLACK_TEMPLE': 2,
 'BOOMSDAY': 3,
 'BRM': 4,
 'CORE': 5,
 'CREDITS': 6,
 'DALARAN': 7,
 'DARKMOON_FAIRE': 8,
 'DRAGONS': 9,
 'EXPERT1': 10,
 'GANGS': 11,
 'GILNEAS': 12,
 'GVG': 13,
 'ICECROWN': 14,
 'KARA': 15,
 'LEGACY': 16,
 'LETTUCE': 17,
 'LOE': 18,
 'LOOTAPALOOZA': 19,
 'MISSIONS': 20,
 'NAXX': 21,
 'OG': 22,
 'PLACEHOLDER_202204': 23,
 'REVENDRETH': 24,
 'SCHOLOMANCE': 25,
 'STORMWIND': 26,
 'TAVERNS_OF_TIME': 27,
 'TB': 28,
 'TGT': 29,
 'THE_BARRENS': 30,
 'THE_SUNKEN_CITY': 31,
 'TROLL': 32,
 'ULDUM': 33,
 'UNGORO': 34,
 'VANILLA': 35,
 'YEAR_OF_THE_DRAGON': 36}

In [92]:
# initialized model
input_size = train_data[0].size(1)
hidden_size = 128
num_epochs = 30
num_classes = len(label_encoder.classes_)
lr = 0.0002
batch_size = 16
    
model = Model(input_size, hidden_size, num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [93]:
# initialize data loader
train_dataset = CustomDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(test_data, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
dev_dataset = CustomDataset(dev_data, dev_labels)
dev_loader = DataLoader(dev_dataset, batch_size=batch_size)

In [94]:
# train model and evaluation
train(model, train_loader, dev_loader)
evaluate_accuracy(model, test_loader)
evaluate_log_loss(model, test_loader)

Epoch [1/30]
  Train Loss: 3.3281 - Validation Loss: 3.1008
Epoch [2/30]
  Train Loss: 2.9976 - Validation Loss: 2.8272
Epoch [3/30]
  Train Loss: 2.7651 - Validation Loss: 2.6517
Epoch [4/30]
  Train Loss: 2.6194 - Validation Loss: 2.5379
Epoch [5/30]
  Train Loss: 2.5090 - Validation Loss: 2.4425
Epoch [6/30]
  Train Loss: 2.3993 - Validation Loss: 2.3644
Epoch [7/30]
  Train Loss: 2.3141 - Validation Loss: 2.2883
Epoch [8/30]
  Train Loss: 2.2363 - Validation Loss: 2.2242
Epoch [9/30]
  Train Loss: 2.1665 - Validation Loss: 2.1666
Epoch [10/30]
  Train Loss: 2.1040 - Validation Loss: 2.1290
Epoch [11/30]
  Train Loss: 2.0514 - Validation Loss: 2.0840
Epoch [12/30]
  Train Loss: 1.9976 - Validation Loss: 2.0509
Epoch [13/30]
  Train Loss: 1.9567 - Validation Loss: 2.0208
Epoch [14/30]
  Train Loss: 1.9105 - Validation Loss: 1.9850
Epoch [15/30]
  Train Loss: 1.8660 - Validation Loss: 1.9631
Epoch [16/30]
  Train Loss: 1.8120 - Validation Loss: 1.9527
Epoch [17/30]
  Train Loss: 1.775



# autogluon

In [72]:
from autogluon.tabular import TabularDataset, TabularPredictor

In [77]:
train = TabularDataset(f'{DATA_PATH_HEARTHSTONE_RACE}/train.csv')
label = 'race'

In [78]:
predictor = TabularPredictor(label=label).fit(train)

No path specified. Models will be saved in: "AutogluonModels\ag-20240429_150521"
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20240429_150521"
AutoGluon Version:  1.1.0
Python Version:     3.11.9
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.19045
CPU Count:          12
Memory Avail:       22.86 GB / 31.95 GB (71.5%)
Disk Space Avail:   226

In [79]:
y_pred = predictor.predict(df_test.drop(columns=[label]))

In [80]:
predictor.evaluate(df_test, silent=True)

{'accuracy': 0.7766798418972332,
 'balanced_accuracy': 0.6358084823002375,
 'mcc': 0.6556862418092518}

In [82]:
predictor.leaderboard(df_test)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.77668,0.825926,accuracy,1.009249,0.264622,411.284641,0.009997,0.000998,0.156773,2,True,14
1,LightGBMXT,0.764822,0.764815,accuracy,0.166,0.032028,9.456308,0.166,0.032028,9.456308,1,True,4
2,XGBoost,0.755929,0.757407,accuracy,0.365675,0.036,11.841154,0.365675,0.036,11.841154,1,True,11
3,LightGBM,0.753953,0.764815,accuracy,0.269999,0.069487,11.533742,0.269999,0.069487,11.533742,1,True,5
4,ExtraTreesGini,0.73913,0.72963,accuracy,0.292216,0.064921,1.113638,0.292216,0.064921,1.113638,1,True,9
5,LightGBMLarge,0.735178,0.742593,accuracy,0.202001,0.036648,18.857769,0.202001,0.036648,18.857769,1,True,13
6,ExtraTreesEntr,0.733202,0.722222,accuracy,0.260048,0.077563,1.143402,0.260048,0.077563,1.143402,1,True,10
7,RandomForestGini,0.728261,0.731481,accuracy,0.256249,0.065462,1.244874,0.256249,0.065462,1.244874,1,True,6
8,CatBoost,0.72332,0.788889,accuracy,0.069002,0.038999,363.975809,0.069002,0.038999,363.975809,1,True,8
9,NeuralNetFastAI,0.721344,0.785185,accuracy,0.036001,0.021,6.059366,0.036001,0.021,6.059366,1,True,3
