In [1]:
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
 
# -----------------------------
# Load Data
# -----------------------------
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
 
# keep label in training data
train_data = train.drop(columns=['id'])
test_data = test.drop(columns=['id'])
 
# -----------------------------
# Feature Engineering
# -----------------------------
def add_features(df):
    df = df.copy()
   
    # duration in minutes
    df["duration_min"] = df["song_duration_ms"] / 60000
   
    # log transforms for skewed continuous features
    for col in ["loudness", "song_duration_ms", "tempo"]:
        df[f"log_{col}"] = np.log1p(df[col].abs() + 1e-6)
   
    # ratios / interactions
    df["energy_per_dance"] = df["energy"] / (df["danceability"] + 1e-6)
    df["speech_per_liveness"] = df["speechiness"] / (df["liveness"] + 1e-6)
    df["acoustic_x_instrumental"] = df["acousticness"] * df["instrumentalness"]
    df["dance_energy"] = df["danceability"] * df["energy"]
   
    return df
 
train_data = add_features(train_data)
test_data = add_features(test_data)
 
# -----------------------------
# Hyperparameters for max power
# -----------------------------
hyperparameters = {
    'GBM': [
        {},
        {'extra_trees': True},
        {'ag_args': {'name_suffix': 'XGB'}, 'use_xgb': True},  
    ],
    'CAT': {},          
    'NN_TORCH': {},    
    'FASTAI': {},      
    'RF': [
        {}, {'criterion': 'entropy'}
    ],
    'XT': {},          
    'KNN': {},          
}
 
# -----------------------------
# AutoML Training
# -----------------------------
predictor = TabularPredictor(label='song_popularity', problem_type='binary', eval_metric='roc_auc').fit(train_data, time_limit=21600, presets='best', num_bag_folds=10, num_stack_levels=3, ag_args_fit={'num_gpus': 1}, hyperparameters=hyperparameters)
 
# -----------------------------
# Predict on Test
# -----------------------------
test_predictions_proba = predictor.predict_proba(test_data)[1]
 
# -----------------------------
# Submission
# -----------------------------
submission = pd.DataFrame({
    'id': test['id'],
    'song_popularity': test_predictions_proba
})
submission.to_csv('submission_max.csv', index=False)
 
print("Submission file 'submission_max.csv' created successfully.")
print(submission.head())
 

  from .autonotebook import tqdm as notebook_tqdm
No path specified. Models will be saved in: "AutogluonModels/ag-20251005_163507"
Preset alias specified: 'best' maps to 'best_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.11.13
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #32-Ubuntu SMP PREEMPT_DYNAMIC Fri Aug 29 14:21:26 UTC 2025
CPU Count:          20
Memory Avail:       7.61 GB / 14.84 GB (51.3%)
Disk Space Avail:   76.67 GB / 137.42 GB (55.8%)
Presets specified: ['best']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=10, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal

Submission file 'submission_max.csv' created successfully.
   id  song_popularity
0   0         0.345203
1   1         0.304189
2   2         0.316706
3   3         0.397028
4   4         0.416990
