In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
file_path = '/content/sample_data/house_sales.csv'
data = pd.read_csv(file_path)

# Drop the index column
data = data.drop(columns=['Unnamed: 0'])

# Bin the House_Sale_Price into categories
price_bins = pd.qcut(data['House_Sale_Price'], q=3, labels=['low', 'medium', 'high'])
data['Price_Category'] = price_bins

# Prepare the features and target variable
X = data.drop(columns=['House_Sale_Price', 'Price_Category'])
y = data['Price_Category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train and evaluate a support vector machine (SVM) model
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_scaled, y_train)
y_pred_svm = svm_model.predict(X_test_scaled)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f'SVM Accuracy: {accuracy_svm:.2f}')
print("SVM Classification Report:")
print(classification_report(y_test, y_pred_svm))

# Train and evaluate a random forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f'Random Forest Accuracy: {accuracy_rf:.2f}')
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))

# Compare model accuracies
if accuracy_svm > accuracy_rf:
    print("SVM performs better than Random Forest.")
elif accuracy_rf > accuracy_svm:
    print("Random Forest performs better than SVM.")
else:
    print("Both models have the same accuracy.")


SVM Accuracy: 0.63
SVM Classification Report:
              precision    recall  f1-score   support

        high       0.78      0.69      0.73      1496
         low       0.63      0.71      0.67      1497
      medium       0.50      0.49      0.49      1545

    accuracy                           0.63      4538
   macro avg       0.63      0.63      0.63      4538
weighted avg       0.63      0.63      0.63      4538

Random Forest Accuracy: 0.66
Random Forest Classification Report:
              precision    recall  f1-score   support

        high       0.75      0.77      0.76      1496
         low       0.67      0.70      0.68      1497
      medium       0.56      0.53      0.54      1545

    accuracy                           0.66      4538
   macro avg       0.66      0.66      0.66      4538
weighted avg       0.66      0.66      0.66      4538

Random Forest performs better than SVM.
