<a href="https://colab.research.google.com/github/Vasantha-Meghana/Predictive_Analytics_Projects/blob/Recommending_Books_to_Users/predictive9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler

# Step 1: Load the dataset
df = pd.read_csv("goodreads_data.csv", on_bad_lines='skip')

# Step 2: Keep required columns
df = df[['Book', 'Author', 'Genres', 'Avg_Rating', 'Num_Ratings']].dropna()
df = df.drop_duplicates(subset='Book')

# Step 3: Clean and convert numeric columns
df['Num_Ratings'] = df['Num_Ratings'].replace(',', '', regex=True).astype(float)
df['Avg_Rating'] = df['Avg_Rating'].astype(float)

# Step 4: Normalize ratings
scaler = MinMaxScaler()
df[['Avg_Rating', 'Num_Ratings']] = scaler.fit_transform(df[['Avg_Rating', 'Num_Ratings']])

# Step 5: Create TF-IDF matrix from authors
tfidf = TfidfVectorizer(stop_words='english')
author_matrix = tfidf.fit_transform(df['Author'])

# Step 6: Combine author and numeric features
numeric_features = df[['Avg_Rating', 'Num_Ratings']].values
combined_features = pd.concat([
    pd.DataFrame(author_matrix.toarray(), index=df.index),
    pd.DataFrame(numeric_features, index=df.index)
], axis=1)

# Step 7: Cosine similarity
similarity = cosine_similarity(combined_features)

# Step 8: Recommendation function with genre filter
def recommend_books_by_genre(book_title, genre_filter='Fantasy', top_n=5):
    if book_title not in df['Book'].values:
        return f"❌ Book '{book_title}' not found in the dataset."

    idx = df[df['Book'] == book_title].index[0]
    similarity_scores = list(enumerate(similarity[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    recommended = []
    for i, score in similarity_scores[1:]:  # Skip the book itself
        genres = eval(df.iloc[i]['Genres']) if isinstance(df.iloc[i]['Genres'], str) else []
        if any(genre_filter.lower() in genre.lower() for genre in genres):
            recommended.append({
                'Book': df.iloc[i]['Book'],
                'Author': df.iloc[i]['Author'],
                'Genres': genres,
                'Avg_Rating': round(df.iloc[i]['Avg_Rating'], 3)
            })
        if len(recommended) == top_n:
            break

    if not recommended:
        return f"⚠️ No recommendations found in the '{genre_filter}' genre."

    # Display in structured format
    print(f"\n📚 Recommended books similar to '{book_title}' in {genre_filter} genre:\n")
    rec_df = pd.DataFrame(recommended)
    print(rec_df[['Book', 'Author', 'Genres', 'Avg_Rating']].to_string(index=False))

# Call the function
recommend_books_by_genre("The Hobbit", genre_filter="Fantasy", top_n=5)


📚 Recommended books similar to 'The Hobbit' in Fantasy genre:

                                 Book             Author                                                                                Genres  Avg_Rating
                              Contact         Carl Sagan [Science Fiction, Fiction, Classics, Science Fiction Fantasy, Space, Novels, Fantasy]       0.828
                      The Water Walls     iLana Markarov                                                                  [Fantasy, Adventure]       1.000
Orion: The Fight for Vox (Voxian, #2) Ruth Watson-Morris                                                                             [Fantasy]       0.970
                         Finley’s Way      Julie Mannino                                                                       [Fantasy, LGBT]       0.966
      Eado's Birth (Jack's Reign, #3)      Julie Mannino                                                                             [Fantasy]       0.964
