The Content-Based Recommender aims to address the limitations of the baseline model by personalizing recommendations based on user preferences. It computes similarity between books using metadata such as title, authors, and genres, and suggests books that are most similar to those the user has liked.


In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

import warnings
warnings.filterwarnings('ignore')

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [5]:
books = pd.read_csv('/content/books.csv.zip')
ratings = pd.read_csv('/content/ratings.csv.zip')
book_tags = pd.read_csv('/content/book_tags.csv.zip')
tags = pd.read_csv('/content/tags.csv')

In [6]:
books['authors'] = books['authors'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x.split(', ')])

In [7]:
def get_genres(x):
    t = book_tags[book_tags.goodreads_book_id==x]
    return [i.lower().replace(" ", "") for i in tags.tag_name.loc[t.tag_id].values]

In [8]:
books['genres'] = books.book_id.apply(get_genres)

In [9]:
books['soup'] = books.apply(lambda x: ' '.join([x['title']] + x['authors'] + x['genres']), axis=1)

In [10]:
books.soup.head()

0    The Hunger Games (The Hunger Games, #1) suzann...
1    Harry Potter and the Sorcerer's Stone (Harry P...
2    Twilight (Twilight, #1) stepheniemeyer young-a...
3    To Kill a Mockingbird harperlee classics favor...
4    The Great Gatsby f.scottfitzgerald classics fa...
Name: soup, dtype: object

In [11]:
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
count_matrix = count.fit_transform(books['soup'])

CONSINE SIMILARITY

In [12]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [13]:
indices = pd.Series(books.index, index=books['title'])
titles = books['title']

In [14]:
def get_recommendations(title, n=10):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    book_indices = [i[0] for i in sim_scores]
    return list(titles.iloc[book_indices].values)[:n]

In [15]:
get_recommendations("The One Minute Manager")

["Good to Great: Why Some Companies Make the Leap... and Others Don't",
 "First, Break All the Rules: What the World's Greatest Managers Do Differently",
 'Execution: The Discipline of Getting Things Done',
 "What Got You Here Won't Get You There: How Successful People Become Even More Successful",
 'Start with Why: How Great Leaders Inspire Everyone to Take Action',
 'Great by Choice: Uncertainty, Chaos, and Luck--Why Some Thrive Despite Them All',
 'The 21 Irrefutable Laws of Leadership: Follow Them and People Will Follow You',
 'The Speed of Trust: The One Thing that Changes Everything',
 'Fish: A Proven Way to Boost Morale and Improve Results',
 'Leadership and Self-Deception: Getting Out of the Box']

In [16]:
def get_name_from_partial(title):
    return list(books.title[books.title.str.lower().str.contains(title) == True].values)

In [21]:
title = "business"
l = get_name_from_partial(title)
list(enumerate(l))

[(0, 'The Power of Habit: Why We Do What We Do in Life and Business'),
 (1,
  "The Lean Startup: How Today's Entrepreneurs Use Continuous Innovation to Create Radically Successful Businesses"),
 (2,
  'Caps for Sale: A Tale of a Peddler, Some Monkeys and Their Monkey Business'),
 (3,
  "The E-Myth Revisited: Why Most Small Businesses Don't Work and What to Do About It"),
 (4, 'The Snowball: Warren Buffett and the Business of Life'),
 (5,
  "The Innovator's Dilemma: The Revolutionary Book that Will Change the Way You Do Business (Collins Business Essentials)"),
 (6, 'The Intelligent Investor (Collins Business Essentials)'),
 (7, 'Purple Cow: Transform Your Business by Being Remarkable'),
 (8, 'Business Model Generation'),
 (9, 'The Long Tail: Why the Future of Business is Selling Less of More'),
 (10,
  "Losing My Virginity: How I've Survived, Had Fun, and Made a Fortune Doing Business My Way"),
 (11,
  'The Hard Thing About Hard Things: Building a Business When There Are No Easy Answer

In [22]:
get_recommendations(l[1])

['Rework',
 'The Hard Thing About Hard Things: Building a Business When There Are No Easy Answers',
 'Blue Ocean Strategy: How To Create Uncontested Market Space And Make The Competition Irrelevant',
 'The Art of the Start: The Time-Tested, Battle-Hardened Guide for Anyone Starting Anything',
 "Good to Great: Why Some Companies Make the Leap... and Others Don't",
 'Start with Why: How Great Leaders Inspire Everyone to Take Action',
 'Zero to One: Notes on Startups, or How to Build the Future',
 "The E-Myth Revisited: Why Most Small Businesses Don't Work and What to Do About It",
 'How Google Works',
 'Delivering Happiness: A Path to Profits, Passion, and Purpose']

POPULARITY AND RATINGS

In [26]:
def improved_recommendations(title, n=10):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    book_indices = [i[0] for i in sim_scores]
    df = books.iloc[book_indices][['title', 'ratings_count', 'average_rating']]

    v = df['ratings_count']
    m = df['ratings_count'].quantile(0.60)
    R = df['average_rating']
    C = df['average_rating'].mean()
    df['weighted_rating'] = (R*v + C*m) / (v + m)

    qualified = df[df['ratings_count'] >= m]
    qualified = qualified.sort_values('weighted_rating', ascending=False)
    return qualified.head(n)

improved_recommendations("The One Minute Manager")


Unnamed: 0,title,ratings_count,average_rating,weighted_rating
2559,The 21 Irrefutable Laws of Leadership: Follow ...,30255,4.12,4.06019
246,The 7 Habits of Highly Effective People: Power...,314700,4.05,4.045478
3234,Start with Why: How Great Leaders Inspire Ever...,32899,4.07,4.035066
931,Good to Great: Why Some Companies Make the Lea...,85277,4.04,4.028535
2326,The Five Dysfunctions of a Team: A Leadership ...,40239,4.01,4.002625
2387,"Delivering Happiness: A Path to Profits, Passi...",37601,4.01,4.002321
2413,The E-Myth Revisited: Why Most Small Businesse...,37671,3.98,3.984657
2219,Built to Last: Successful Habits of Visionary ...,39618,3.98,3.98452
3360,"First, Break All the Rules: What the World's G...",27207,3.92,3.955049
989,Rework,88626,3.93,3.944028


In [27]:
improved_recommendations(l[1])

Unnamed: 0,title,ratings_count,average_rating,weighted_rating
2165,"Zero to One: Notes on Startups, or How to Buil...",47807,4.17,4.097532
3234,Start with Why: How Great Leaders Inspire Ever...,32899,4.07,4.024009
931,Good to Great: Why Some Companies Make the Lea...,85277,4.04,4.022673
2387,"Delivering Happiness: A Path to Profits, Passi...",37601,4.01,3.992207
2219,Built to Last: Successful Habits of Visionary ...,39618,3.98,3.974785
2413,The E-Myth Revisited: Why Most Small Businesse...,37671,3.98,3.974627
1925,Made to Stick: Why Some Ideas Survive and Othe...,46736,3.97,3.968913
3360,"First, Break All the Rules: What the World's G...",27207,3.92,3.943209
989,Rework,88626,3.93,3.938527
2685,Blue Ocean Strategy: How To Create Uncontested...,30665,3.86,3.909643
