In [148]:
import pandas as pd
import numpy as np

import seaborn as sns
sns.set(font_scale=1.5)

import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.style.use('fivethirtyeight')

In [149]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import NearestNeighbors

In [150]:
amazon_books = pd.read_csv('../data-sources/amazon-bestsellers/amazon-bestsellers.csv')
amazon_books.columns = ['name', 'author', 'rating', 'reviews', 'price', 'year', 'genre']
amazon_books.drop_duplicates(subset='name', inplace=True, ignore_index=True)
amazon_books.head()

Unnamed: 0,name,author,rating,reviews,price,year,genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [151]:
le = LabelEncoder()
amazon_books['genre'] = le.fit_transform(amazon_books['genre'])

In [152]:
amazon_books.shape

(351, 7)

In [153]:
amazon_books = pd.get_dummies(amazon_books, columns=['author'], drop_first=True)
amazon_books.shape

(351, 253)

In [154]:
scalable_cols = amazon_books[['rating', 'reviews', 'price', 'year']]
scaler = StandardScaler()
scalable_cols = pd.DataFrame(scaler.fit_transform(scalable_cols), columns=scalable_cols.columns)
scalable_cols.head()

Unnamed: 0,rating,reviews,price,year
0,0.402782,0.695506,-0.505844,0.757873
1,-0.039019,-0.713687,0.88906,-0.767433
2,0.402782,0.845563,0.191608,1.367996
3,0.402782,1.070787,-0.705116,1.062934
4,0.844583,-0.196639,-0.1073,1.673057


In [155]:
amazon_books[['rating', 'reviews', 'price', 'year']] = scalable_cols

In [163]:
def BookRecommender(amazon_books, num_recommendations, selected_book):
    
    neighbor_finder = NearestNeighbors(n_neighbors=num_recommendations+1, metric='manhattan')
    neighbor_finder.fit(amazon_books.iloc[:, 1:])
    
    distances, indices = neighbor_finder.kneighbors(amazon_books.set_index('name', drop=True).loc[[selected_book]])
    recommended_books = [amazon_books.loc[i][0] for i in indices.flatten()][1:]
    return recommended_books

In [164]:
BookRecommender(amazon_books, 5, amazon_books.iloc[10, 0])

['Arguing with Idiots: How to Stop Small Minds and Big Government',
 'Divine Soul Mind Body Healing and Transmission System: The Divine Way to Heal You, Humanity, Mother Earth, and All…',
 'Going Rogue: An American Life',
 'Eat This Not That! Supermarket Survival Guide: The No-Diet Weight Loss Solution',
 'The Five Dysfunctions of a Team: A Leadership Fable']

In [158]:
amazon_books.index

RangeIndex(start=0, stop=351, step=1)

In [162]:
amazon_books.iloc[11, 0]

'A Stolen Life: A Memoir'

In [160]:
amazon_books.shape

(351, 253)

In [161]:
amazon_books.set_index('name', drop=True).loc[[amazon_books.iloc[10, 0]]]

Unnamed: 0_level_0,rating,reviews,price,year,genre,author_Adam Gasiewski,author_Adam Mansbach,author_Adir Levy,author_Admiral William H. McRaven,author_Adult Coloring Book Designs,...,author_Todd Burpo,author_Tony Hsieh,author_Tucker Carlson,author_Veronica Roth,author_W. Cleon Skousen,author_Walter Isaacson,author_William Davis,author_William P. Young,author_Wizards RPG Team,author_Zhi Gang Sha
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A Patriot's History of the United States: From Columbus's Great Discovery to the War on Terror,-0.039019,-0.860336,-1.10366,-1.072495,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
