In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report



In [3]:
# Load dataset
data = pd.read_csv("../Data Set/Housing.csv")  # or use pd.read_clipboard() if pasting



In [4]:
data.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [5]:
data['price'].value_counts()

price
4200000    17
3500000    17
4900000    12
5250000     9
5600000     9
           ..
2135000     1
1960000     1
1855000     1
1820000     1
1767150     1
Name: count, Length: 219, dtype: int64

In [6]:
def price_Category(price):
    if price <= 2000000:
        return 'Low'
    elif price >= 2000000 and price < 4000000:
        return 'Medium'
    else:
        return 'High'

In [7]:
# Convert categorical to numerical
label_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating',
              'airconditioning', 'prefarea', 'furnishingstatus']

le = LabelEncoder()
for col in label_cols:
    data[col] = le.fit_transform(data[col])

# Convert price to categories (Low, Medium, High)
data['price_category'] = data['price'].apply(price_Category)

# Drop original price for classification
X = data.drop(['price', 'price_category'], axis=1)
y = data['price_category']

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

# SVC
svc = SVC()
svc.fit(X_train, y_train)
svc_preds = svc.predict(X_test)






# Evaluation
def evaluate_model(y_true, y_pred, name):
    print(f"\n{name} Results")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

evaluate_model(y_test, rf_preds, "Random Forest")
evaluate_model(y_test, svc_preds, "Support Vector Classifier")




Random Forest Results
Accuracy: 0.8348623853211009
Precision: 0.8077763215377894
Recall: 0.8348623853211009

Classification Report:
               precision    recall  f1-score   support

        High       0.90      0.90      0.90        67
         Low       0.00      0.00      0.00         4
      Medium       0.74      0.82      0.78        38

    accuracy                           0.83       109
   macro avg       0.54      0.57      0.56       109
weighted avg       0.81      0.83      0.82       109


Support Vector Classifier Results
Accuracy: 0.7247706422018348
Precision: 0.711981056217195
Recall: 0.7247706422018348

Classification Report:
               precision    recall  f1-score   support

        High       0.83      0.78      0.80        67
         Low       0.00      0.00      0.00         4
      Medium       0.59      0.71      0.64        38

    accuracy                           0.72       109
   macro avg       0.47      0.50      0.48       109
weighted avg  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
data['price_category'].value_counts()

price_category
High      326
Medium    210
Low         9
Name: count, dtype: int64

In [9]:
sample_input = np.array([3000,2,1	,1	,1	,0	,1	,0	,0	,2	,0	,2]).reshape(1, 12)
predicted_price = rf.predict(sample_input)
print("Predicted price:", predicted_price[0])

Predicted price: Medium


