In [None]:
# ==== System info ====
!nvidia-smi -L || echo "CPU-only runtime"
!python --version

# ==== Install AutoGluon and Kaggle ====
!pip install -U pip
!pip install -U "autogluon>=1.0" kaggle

# ==== Ensure folders exist ====
import os
for d in ["data","artifacts"]:
    os.makedirs(d, exist_ok=True)
print("Setup complete. Remember: Edit ▸ Notebook settings ▸ uncheck 'Omit code cell output when saving'.")


In [None]:
# ==== Kaggle API setup ====
from google.colab import files
from pathlib import Path
import os

print("Upload kaggle.json (Kaggle ▸ Account ▸ Create New API Token)")
uploaded = files.upload()  # choose kaggle.json from your computer

Path("/root/.kaggle").mkdir(parents=True, exist_ok=True)
with open("/root/.kaggle/kaggle.json","wb") as f:
    f.write(uploaded['kaggle.json'])
os.chmod("/root/.kaggle/kaggle.json", 0o600)

!kaggle competitions list -s fraud | head -n 5


In [None]:
# ---- Download & unzip ----
!kaggle competitions download -c ieee-fraud-detection -p data
!unzip -qo data/ieee-fraud-detection.zip -d data/ieee
print("Files in data/ieee:", !ls -1 data/ieee | head -n 10)


In [None]:
# ---- Load & merge ----
import pandas as pd
train_tr = pd.read_csv('data/ieee/train_transaction.csv')
train_id = pd.read_csv('data/ieee/train_identity.csv')
test_tr  = pd.read_csv('data/ieee/test_transaction.csv')
test_id  = pd.read_csv('data/ieee/test_identity.csv')

train = train_tr.merge(train_id, how='left', on='TransactionID')
test  = test_tr.merge(test_id,  how='left', on='TransactionID')

print("Shapes:", train.shape, test.shape)
print("Target distribution:", train['isFraud'].value_counts(normalize=True))


In [None]:
# ---- Optional speed tweak: downcast floats ----
for df in (train, test):
    for c in df.select_dtypes(include=['float64']).columns:
        df[c] = df[c].astype('float32')
print('Downcast done')


In [None]:
# ---- Train AutoGluon ----
from autogluon.tabular import TabularPredictor

label = 'isFraud'
predictor = TabularPredictor(label=label, problem_type='binary', path='ag_ieee/').fit(
    train_data=train,
    time_limit=1800,                  # adjust for longer/better training (e.g., 3600)
    presets='medium_quality_faster_train',  # swap to 'best_quality' if you have more time
)
print("Training complete.")


In [None]:
# ---- Leaderboard & Feature Importance → artifacts/ ----
lb = predictor.leaderboard(silent=True)
fi = predictor.feature_importance(train)

lb_path = 'artifacts/ieee_fraud_leaderboard.csv'
fi_path = 'artifacts/ieee_fraud_feature_importance.csv'
lb.to_csv(lb_path, index=False)
fi.to_csv(fi_path)

print("Saved:", lb_path, fi_path)
lb.head(10), fi.head(10)


In [None]:
# ---- Predictions for Kaggle submission ----
probas = predictor.predict_proba(test)
sub = pd.DataFrame({
    'TransactionID': test['TransactionID'],
    'isFraud': probas[1],  # probability of positive class
})
sub_path = 'artifacts/ieee_fraud_submission.csv'
sub.to_csv(sub_path, index=False)
print("Saved:", sub_path)
sub.head()
