# Naive Bayes

In [None]:
import pandas as pd

from sklearn.metrics import mean_squared_error, r2_score, classification_report
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler

merged_book_attributes_gold = './data/gold/merged_book_attributes.csv'

In [None]:
merged_book_attributes = pd.read_csv(merged_book_attributes_gold)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(merged_book_attributes[['price', 'full_review_sentiment_score', 'review_summary_sentiment_score', 'books_average_rating']], merged_book_attributes[['rating']], test_size=0.2, random_state=42)

In [None]:
# modify data to work on categories instead of float values
bins = [0, 1, 2, 3, 4, 5]
labels = [1, 2, 3, 4, 5]
merged_book_attributes['rating_category'] = pd.cut(merged_book_attributes['rating'], bins=bins, labels=labels, include_lowest=True)
X = merged_book_attributes.drop(['rating', 'rating_category'], axis=1)
y = merged_book_attributes['rating_category']

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
nb = GaussianNB()
nb.fit(X_train_scaled, y_train)

In [None]:
y_pred = nb.predict(X_test_scaled)

In [None]:
mse = mean_squared_error(y_test, y_pred) # goal is 0
r2 = r2_score(y_test, y_pred) # goal is positive number, the higher the better.
print(classification_report(y_test, y_pred))
print(f'Mean Squared Error: {mse}')
print(f'R-squared Score: {r2}')