In [6]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors

# Load the data and preprocess
books_df = pd.read_csv("Books.csv", dtype={'Year-Of-Publication': 'str'}, low_memory=False)
books_df.dropna(inplace=True)

books_df['Book-Author'] = books_df['Book-Author'].str.replace(' ', '').str.lower()
books_df['Publisher'] = books_df['Publisher'].str.replace(' ', '').str.lower()
books_df['Book-Title'] = books_df['Book-Title'].str.lower()

books_df['tags'] = books_df['Book-Author'] + ' ' + books_df['Publisher'] + ' ' + books_df['Book-Title']
new_df = books_df[['Book-Title', 'tags']].copy()
new_df.loc[:, 'tags'] = new_df['tags'].apply(lambda x: x.lower())

# Vectorize tags
cv = CountVectorizer(max_features=5000, stop_words='english')  # Limit max features for efficiency
vectors = cv.fit_transform(new_df['tags'])  # Keep it sparse

# Use Nearest Neighbors
nn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=6)  # Find 5 similar books + the book itself
nn.fit(vectors)

# Recommendation function
def recommend(book_title):
    book_title = book_title.lower()
    if book_title not in new_df['Book-Title'].values:
        print(f"'{book_title}' not found in the dataset. Please try another title.")
        return
    
    book_index = new_df[new_df['Book-Title'] == book_title].index[0]
    distances, indices = nn.kneighbors(vectors[book_index])
    
    print(f"Books similar to '{book_title}':")
    for i in indices[0][1:]:  # Skip the first result (it’s the same book)
        print(new_df.iloc[i]['Book-Title'])

recommend('PLEADING GUILTY')


Books similar to 'pleading guilty':
the carousel
maneater
guilty
everything and a kite
the corrections


In [4]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class BookRecommender:
    def __init__(self):
        # Load and clean the dataset
        self.df = pd.read_csv('booksumm.csv')
        self.df.dropna(subset=['Book-title', 'Author', 'Plot-summary'], inplace=True)
        
        # Process genres (remove nested structure if present)
        self.df['Book genres'] = self.df['Book genres'].str.replace('[{}"/]', '', regex=True)
        
        # Create tags column
        self.df['tags'] = (self.df['Author'] + ' ' +
                           self.df['Book genres'].fillna('') + ' ' +
                           self.df['Plot-summary']).str.lower()
        
        # Prepare data for recommendations
        self.new_df = self.df[['Book-title', 'tags']].rename(columns={'Book-title': 'title'})
        self.cv = CountVectorizer(max_features=2000, stop_words='english')
        self.vectors = self.cv.fit_transform(self.new_df['tags']).toarray()
        self.similarity = cosine_similarity(self.vectors)

    def recommend(self, book_title):
        try:
            # Find index of the given book
            book_index = self.new_df[self.new_df['title'].str.lower() == book_title.lower()].index[0]
            distances = self.similarity[book_index]

            # Get top 5 recommendations
            book_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]
            return [self.new_df.iloc[i[0]].title for i in book_list]
        except IndexError:
            return ["Book not found. Please try another."]

# Usage
recommender = BookRecommender()

# Test the recommendation system
book_title = "A Wizard of Earthsea"
recommendations = recommender.recommend(book_title)
print("Recommendations for:", book_title)
print(recommendations)


Recommendations for: A Wizard of Earthsea
['American Gods', 'The Shadow', 'The Wreck of the Zephyr', 'Babylon 5: The Passing of the Techno-Mages - Invoking Darkness', 'Apocalypse']


  ret = a @ b


In [6]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class BookRecommender:
    def __init__(self):
        # Load and clean the dataset
        self.df = pd.read_csv('booksumm.csv')
        self.df.dropna(subset=['Book-title', 'Author', 'Plot-summary'], inplace=True)
        
        # Process genres (remove nested structure if present)
        self.df['Book genres'] = self.df['Book genres'].str.replace('[{}"/]', '', regex=True)
        
        # Create weighted tags column
        self.df['tags'] = (
            (self.df['Author'] + ' ') * 1 +  # Lower weight for Author
            (self.df['Book genres'].fillna('') + ' ') * 2 +  # Medium weight for genres
            (self.df['Plot-summary'] + ' ') * 3  # Higher weight for summary
        ).str.lower()
        
        # Prepare data for recommendations using TF-IDF
        self.new_df = self.df[['Book-title', 'tags']].rename(columns={'Book-title': 'title'})
        self.tfidf = TfidfVectorizer(max_features=2000, stop_words='english')
        self.vectors = self.tfidf.fit_transform(self.new_df['tags']).toarray()
        self.similarity = cosine_similarity(self.vectors)

    def recommend(self, book_title):
        try:
            # Find index of the given book
            book_index = self.new_df[self.new_df['title'].str.lower() == book_title.lower()].index[0]
            distances = self.similarity[book_index]

            # Get top 5 recommendations
            book_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]
            return [self.new_df.iloc[i[0]].title for i in book_list]
        except IndexError:
            return ["Book not found. Please try another."]

# Usage
recommender = BookRecommender()

# Test the recommendation system
book_title = "A Wizard of Earthsea"
recommendations = recommender.recommend(book_title)
print("Recommendations for:", book_title)
print(recommendations)

Recommendations for: A Wizard of Earthsea
['The Shadow', 'American Gods', 'Double Dexter', 'Babylon 5: The Passing of the Techno-Mages - Invoking Darkness', 'The Guns of Avalon']


  ret = a @ b
