In [0]:
!pip install flaml

### Prepared by Ian Kidwell

In [0]:
# Install FLAML in the Databricks cluster first:
# %pip install flaml

import pandas as pd
from flaml import AutoML
from sklearn.metrics import mean_squared_error
import numpy as np
import os

# --- FIX: Load the specific Training Data we created in Step 2 ---
# We use '../data/train.csv' instead of the full dataset
train_path = "../data/train.csv"
test_path = "../data/test.csv"

print(f"Loading training data from: {os.path.abspath(train_path)}")
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)

# Split features/target
target = "SalePrice_log"

# Prepare Train Data
X_train = df_train.drop(columns=[target])
y_train = df_train[target]

# Prepare Test Data (for final evaluation)
X_test = df_test.drop(columns=[target])
y_test = df_test[target]

# AutoML setup
automl = AutoML()
automl_settings = {
    "time_budget": 300,  # 5 minutes max
    "metric": 'rmse',
    "task": 'regression',
    "log_file_name": "flaml.log",
}

# Train
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

# Best model & prediction
print("Best ML learner:", automl.best_estimator)
y_pred = automl.predict(X_test)

# Evaluate
from sklearn.metrics import mean_squared_error
import numpy as np
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Test RMSE:", rmse)
