In [1]:
# ==== System info ====
!nvidia-smi -L || echo "CPU-only runtime"
!python --version

# ==== Install AutoGluon and Kaggle ====
!pip install -U pip
!pip install -U "autogluon>=1.0" kaggle

# ==== Ensure folders exist ====
import os
for d in ["data","artifacts"]:
    os.makedirs(d, exist_ok=True)
print("Setup complete. Remember: Edit ▸ Notebook settings ▸ uncheck 'Omit code cell output when saving'.")


/bin/bash: line 1: nvidia-smi: command not found
CPU-only runtime
Python 3.12.12
Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2
Collecting autogluon>=1.0
  Downloading autogluon-1.4.0-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.core==1.4.0 (from autogluon.core[all]==1.4.0->autogluon>=1.0)
  Downloading autogluon.core-1.4.0-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.features==1.4.0 (from autogluon>=1.0)
  Downloading autogluon.features-1.4.0-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.tabular==1.4.0 (from autogluon.tabular[all]==1.4.0->autogluon>=1.0)
  D

In [2]:
# ---- Multimodal tabular demo: add text feature ----
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor

train = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')
test  = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv')
label = 'class'

# Create a simple text feature by concatenating a few columns
def add_text_feature(df):
    df = df.copy()
    cols = [c for c in df.columns if c != label]
    use = [c for c in cols if df[c].dtype == 'object'][:3]  # take a few cat columns
    if len(use) >= 2:
        df['desc_text'] = df[use].astype(str).agg(' '.join, axis=1)
    else:
        df['desc_text'] = df[cols].astype(str).agg(' '.join, axis=1)
    return df

train_mm = add_text_feature(train)
test_mm  = add_text_feature(test)

predictor = TabularPredictor(label=label, path='ag_multimodal/').fit(
    train_data=train_mm,
    time_limit=900,
    presets='medium_quality_faster_train'
)

lb = predictor.leaderboard(test_mm, silent=True)
fi = predictor.feature_importance(test_mm)

lb_path = 'artifacts/multimodal_leaderboard.csv'
fi_path = 'artifacts/multimodal_feature_importance.csv'
lb.to_csv(lb_path, index=False)
fi.to_csv(fi_path)

print("Saved:", lb_path, fi_path)
lb.head(10), fi.head(10)


Preset alias specified: 'medium_quality_faster_train' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Oct  2 10:42:05 UTC 2025
CPU Count:          2
Memory Avail:       11.40 GB / 12.67 GB (90.0%)
Disk Space Avail:   62.11 GB / 107.72 GB (57.7%)
Presets specified: ['medium_quality_faster_train']
Using hyperparameters preset: hyperparameters='default'
Beginning AutoGluon training ... Time limit = 900s
AutoGluon will save models to "/content/ag_multimodal"
Train Data Rows:    39073
Train Data Columns: 15
Label Column:       class
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [' <=50K', ' >50K']
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary',

Saved: artifacts/multimodal_leaderboard.csv artifacts/multimodal_feature_importance.csv


(                 model  score_test  score_val eval_metric  pred_time_test  \
 0              XGBoost    0.875422     0.8876    accuracy        0.150611   
 1  WeightedEnsemble_L2    0.875422     0.8876    accuracy        0.154048   
 2             CatBoost    0.871737     0.8832    accuracy        0.046511   
 3             LightGBM    0.871532     0.8836    accuracy        0.219175   
 4        LightGBMLarge    0.870406     0.8860    accuracy        0.552181   
 5           LightGBMXT    0.869997     0.8828    accuracy        0.659699   
 6      NeuralNetFastAI    0.858532     0.8616    accuracy        0.236763   
 7     RandomForestEntr    0.856997     0.8536    accuracy        1.700191   
 8       NeuralNetTorch    0.856792     0.8584    accuracy        0.100514   
 9     RandomForestGini    0.854028     0.8520    accuracy        2.440612   
 
    pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  \
 0       0.046986    6.959222                 0.150611    