In [None]:
# ==== System info ====
!nvidia-smi -L || echo "CPU-only runtime"
!python --version

# ==== Install AutoGluon and Kaggle ====
!pip install -U pip
!pip install -U "autogluon>=1.0" kaggle

# ==== Ensure folders exist ====
import os
for d in ["data","artifacts"]:
    os.makedirs(d, exist_ok=True)
print("Setup complete. Remember: Edit ▸ Notebook settings ▸ uncheck 'Omit code cell output when saving'.")


In [None]:
# ---- Automatic feature engineering demo ----
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.features.generators import AutoMLPipelineFeatureGenerator
import pandas as pd

train = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')
test  = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv')
label = 'class'

# Fit a feature generator to inspect engineered features
fg = AutoMLPipelineFeatureGenerator(enable_categorical_special=True, enable_text_special=True)
X_train = train.drop(columns=[label])
fg.fit(X=X_train)
X_train_transformed = fg.transform(X_train)

# Save engineered feature names
feat_list_path = 'artifacts/feature_engineering_columns.txt'
with open(feat_list_path, 'w') as f:
    f.write("\n".join(list(X_train_transformed.columns)))

print(f"Engineered {X_train_transformed.shape[1]} features. Saved list to {feat_list_path}")

# Train a predictor using engineered features (AutoGluon handles its own pipeline internally)
predictor = TabularPredictor(label=label, path='ag_feature_eng/').fit(
    train_data=train,
    time_limit=600,
    presets='medium_quality_faster_train'
)

lb = predictor.leaderboard(test, silent=True)
lb_path = 'artifacts/feature_eng_leaderboard.csv'
lb.to_csv(lb_path, index=False)
print("Saved:", lb_path)
lb.head(10)
