In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("house_price.csv")

In [3]:
df['date'] = pd.to_datetime(df['date'])

In [4]:
df['zip'] = df['statezip'].apply(lambda x: x.split()[-1])

In [5]:
df.drop(columns=['street', 'statezip', 'country', 'date'], inplace=True)

In [6]:
from sklearn.preprocessing import LabelEncoder

In [7]:
le_city = LabelEncoder()
le_zip = LabelEncoder()
df['city'] = le_city.fit_transform(df['city'])
df['zip'] = le_zip.fit_transform(df['zip'])

In [8]:
df['price_category'] = pd.qcut(df['price'], q=3, labels=['Low', 'Medium', 'High'])

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

In [10]:
X = df.drop(columns=['price', 'price_category'])
y = df['price_category']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
clf = RandomForestClassifier(random_state=42)

In [13]:
clf.fit(X_train, y_train)

In [14]:
clf_pred = clf.predict(X_test)


In [15]:
print("Classifier Report:\n", classification_report(y_test, clf_pred))


Classifier Report:
               precision    recall  f1-score   support

        High       0.74      0.79      0.76       308
         Low       0.81      0.74      0.77       314
      Medium       0.61      0.62      0.61       298

    accuracy                           0.72       920
   macro avg       0.72      0.72      0.72       920
weighted avg       0.72      0.72      0.72       920



In [16]:
joblib.dump(clf, "house_price_classifier.pkl")
joblib.dump(le_city, "label_encoder_city.pkl")
joblib.dump(le_zip, "label_encoder_zip.pkl")

['label_encoder_zip.pkl']