# 🏠 TrueVal AI Model Training
This notebook trains a Decision Tree model to predict house prices using property data from `airtable_data.csv`.

In [None]:
# ✅ Step 1: Import libraries
import os
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import joblib

In [None]:
# ✅ Step 2: Define paths and clear old data
DATA_FILE = "airtable_data.csv"
MODEL_FILE = "ai_estimator.pkl"

# Option: Clear CSV content but keep headers
def clear_csv_file(path):
    with open(path, "w") as f:
        f.write("price,bedrooms,postcode,heating_type,epc_rating,sqft\n")
    print(f"✅ Cleared contents of {path}")

clear_csv_file(DATA_FILE)

In [None]:
# ✅ Step 3: Load and clean data
df = pd.read_csv(DATA_FILE)
df = df.dropna(subset=['price', 'bedrooms', 'postcode'])
df['postcode_prefix'] = df['postcode'].str.extract(r'(\w+)')
df = pd.get_dummies(df, columns=['postcode_prefix', 'heating_type', 'epc_rating'], drop_first=True)

features = df.drop(columns=['price', 'postcode'])
labels = df['price']

In [None]:
# ✅ Step 4: Train the model
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [None]:
# ✅ Step 5: Evaluate and save model
preds = model.predict(X_test)
mae = mean_absolute_error(y_test, preds)
print(f"📊 MAE: £{mae:,.2f}")

joblib.dump(model, MODEL_FILE)
print(f"✅ Model saved as {MODEL_FILE}")