In [17]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [18]:
df = pd.read_csv('cleaned_books.csv')

In [19]:
print(df.head(4))

   book_id                                              title  \
0  2767052            The Hunger Games (The Hunger Games, #1)   
1        3  Harry Potter and the Sorcerer's Stone (Harry P...   
2    41865                            Twilight (Twilight, #1)   
3     2657                              To Kill a Mockingbird   

                       authors  \
0              Suzanne Collins   
1  J.K. Rowling, Mary GrandPré   
2              Stephenie Meyer   
3                   Harper Lee   

                                           image_url  
0  https://images.gr-assets.com/books/1447303603m...  
1  https://images.gr-assets.com/books/1474154022m...  
2  https://images.gr-assets.com/books/1361039443m...  
3  https://images.gr-assets.com/books/1361975680m...  


In [20]:
df['combined'] = df['title'].str.lower() + ' ' + df['authors'].str.lower()

In [21]:
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['combined'])

In [22]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [23]:
def recommend_book(title, author):
    input_combined = f"{title.lower()} {author.lower()}"
    if input_combined not in df['combined'].values:
        return "Book not found."
    idx = df[df['combined'] == input_combined].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_books = [df['title'].iloc[i[0]] for i in sim_scores[1:6]]
    return top_books


In [24]:
print(recommend_book("To kill a mockingbird", "Harper Lee"))

['Mockingbird', 'A Time to Kill', 'Go Set a Watchman', 'The Last Boleyn', 'First to Kill (Nathan McBride, #1)']
