In [None]:
pip install -r ./requirements.txt

In [None]:
import time

import matplotlib.pyplot as plt

import pandas as pd

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsRegressor

merged_book_attributes_gold = './data/gold/merged_book_attributes.csv'

In [None]:
merged_book_attributes = pd.read_csv(merged_book_attributes_gold)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(merged_book_attributes[['price', 'full_review_sentiment_score', 'review_summary_sentiment_score', 'books_average_rating']], merged_book_attributes[['rating']], test_size=0.2, random_state=42)

In [None]:
knn_regressor = KNeighborsRegressor()
param_grid = {'n_neighbors': range(1, 17), 'weights': ['uniform', 'distance']}
best_knn_regressor = GridSearchCV(knn_regressor, param_grid, cv=5, scoring='r2')

In [None]:
start = time.time()
best_knn_regressor.fit(X_train, y_train)
stop = time.time()
knn_regressor_training_time = round(stop - start, 2)

In [None]:
print(f"K-nearest neighbors training time: {knn_regressor_training_time}s")
print("Best parameters:", best_knn_regressor.best_params_)
print("Best r2 score:", best_knn_regressor.best_score_)

In [None]:
print(f"r2 score on test data: {best_knn_regressor.score(X_test, y_test)}")

In [None]:
knn_models = []
for k in range(1, 17):
    knn_regressor = KNeighborsRegressor(n_neighbors=k)
    knn_models.append(knn_regressor)

In [None]:
mse_values = []
for index, knn_regressor in enumerate(knn_models):
    knn_regressor.fit(X_train, y_train)
    y_pred = knn_regressor.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_values.append(mse)

In [None]:
plt.plot([i for i in range(1, len(knn_models) + 1)], mse_values)
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('Mean Squared Error (MSE)')
plt.title('Elbow plot for k-nearest neighbors regression model')
plt.show()

In [1]:
import matplotlib.pyplot as plt