In [1]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC


# Data extraction and preparing train and test data sets
df = pd.read_excel('Excel_files\\Processed data combined.xlsx')
df['room_groups'] = df['room_number'].apply(lambda x: '<2.5' if x < 2.5 else ('<4.5' if x < 4.5 else '4.5+'))
values = df[['price', 'sq_meters']].values.tolist()
labels = list(df['room_groups'])
train_values, test_values, train_labels, test_labels = train_test_split(values, labels, test_size=0.25)
scaler = StandardScaler()
train_values_scaled = scaler.fit_transform(train_values)
test_values_scaled = scaler.fit_transform(test_values)

In [2]:
# Room number prediction using k-nearest neighbors algorithm
# Define the parameter grid
param_grid_knear = {'n_neighbors': range(25, 150)}

# Initialize KNeighborsClassifier
knear_classifier = KNeighborsClassifier()

# Perform Grid Search Cross Validation
grid_search_knear = GridSearchCV(knear_classifier, param_grid_knear, cv=5, scoring='accuracy')
grid_search_knear.fit(train_values_scaled, train_labels)

# Get the best parameters and its accuracy
best_params = grid_search_knear.best_params_
best_accuracy = grid_search_knear.best_score_

print("Best Parameters:", best_params)
print("Best Accuracy:", best_accuracy)

Best Parameters: {'n_neighbors': 26}
Best Accuracy: 0.8646464646464647


In [3]:
# Room number prediction using random forest algorithm
# Define the parameter grid
param_grid_rforest = {'n_estimators': range(100, 1500, 100)}

# Initialize RandomForestClassifier
rforest_classifier = RandomForestClassifier()

# Perform Grid Search Cross Validation
grid_search_rforest = GridSearchCV(rforest_classifier, param_grid_rforest, cv=5, scoring='accuracy')
grid_search_rforest.fit(train_values_scaled, train_labels)

# Get the best parameters and its accuracy
best_params_rforest = grid_search_rforest.best_params_
best_accuracy_rforest = grid_search_rforest.best_score_

print("Best Parameters:", best_params_rforest)
print("Best Accuracy:", best_accuracy_rforest)

Best Parameters: {'n_estimators': 300}
Best Accuracy: 0.8363636363636363


In [4]:
# Room number prediction using SVM algorithm
# Define the parameter grid
param_grid_svm = {'C': [0.1, 1, 10, 100],
              'gamma': [1, 0.1, 0.01, 0.001],
              'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}

# Initialize SVC
svm_classifier = SVC()

# Perform Grid Search Cross Validation
grid_search_svm = GridSearchCV(svm_classifier, param_grid_svm, cv=5, scoring='accuracy')
grid_search_svm.fit(train_values_scaled, train_labels)

# Get the best parameters and its accuracy
best_params_svm = grid_search_svm.best_params_
best_accuracy_svm = grid_search_svm.best_score_

print("Best Parameters:", best_params_svm)
print("Best Accuracy:", best_accuracy_svm)

Best Parameters: {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}
Best Accuracy: 0.8666666666666666
