In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

#Read your data from CSV file
data = pd.read_csv("data/Books.csv")

#Extract the year as we want to have that to help predict price
data['releasedYear'] = pd.to_datetime(data['releasedDate']).dt.year

#Hard coded the popularity score (1-10) which also help in predicting the price
author_score_map = {
    "JK Rowling" : 10,
    "Dan Brown" : 8,
    "Stephenie Meyer" : 7
}

data['author_popularity_score'] = data['author'].map(author_score_map)
data = pd.get_dummies(data, columns=['language', 'genre'], drop_first=True)
data['author_popularity_score'] = data['author_popularity_score'].fillna(5)
feature_cols = ['pageCount', 'releasedYear', 'author_popularity_score'] + \
    [col for col in data.columns if col.startswith('language_') or col.startswith('genre_')]

#Choose your data
X = data[feature_cols]
y = data['price']

#Now split this data into training sets and test sets
[X_train, X_test, y_train, y_test] = train_test_split(X,y, test_size=0.2, random_state=42)

#Now plugged in the x and y training sets into our model
model = LinearRegression()
model.fit(X_train, y_train)

#Completely for yourself if you want to know the Coefficients/Intercept
print("Coefficient value: ", model.coef_)
print("Intercept value: ", model.intercept_)

#Tests the prediction with X_test
y_pred = model.predict(X_test)

#Check the metrics to see if we achieved good R2 score
print("R2 Score: ", r2_score(y_test, y_pred))
print("MSE: ", mean_squared_error(y_test, y_pred))



Coefficient value:  [ 0.01150757  0.36225961  0.55775878  1.5863567  -2.65841444]
Intercept value:  -721.8134557713207
R2 Score:  0.9005743080018083
MSE:  2.901987385197221


In [27]:
example = pd.DataFrame([[600, 2025, 8]], columns=['pageCount', 'releasedYear', 'author_popularity_score'])

predict_price = model.predict(example)
print("Suggested price: ", predict_price)

Suggested price:  [23.73013369]
