In [2]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from google.colab import files  # unique to Colab

# 1. Load Data
# Since you uploaded it directly to Colab, just use the filename
df = pd.read_csv('/content/AmesHousing.csv')

# --- Clean Column Names ---
df.columns = df.columns.str.replace(' ', '')

# 2. Select Features
X = df[['OverallQual', 'YearBuilt', 'GrLivArea', 'FullBath', 'BedroomAbvGr']]
y_price = df['SalePrice']
y_class = (df['SalePrice'] > df['SalePrice'].median()).astype(int)

# Handle missing values
X = X.fillna(X.mean())

# 3. Scale the Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. Train Models
print("Training Price Model...")
reg_model = RandomForestRegressor(n_estimators=100, random_state=42)
reg_model.fit(X_scaled, y_price)

print("Training Classification Model...")
clf_model = RandomForestClassifier(n_estimators=100, random_state=42)
clf_model.fit(X_scaled, y_class)

# 5. Save Models
joblib.dump(reg_model, 'price_model.pkl')
joblib.dump(clf_model, 'class_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("✅ Training Complete. Downloading files now...")

# 6. Trigger Download (Browser will ask to save 3 files)
files.download('price_model.pkl')
files.download('class_model.pkl')
files.download('scaler.pkl')

Training Price Model...
Training Classification Model...
✅ Training Complete. Downloading files now...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>