In [None]:
import pandas as pd
import numpy as np

books=pd.read_csv('cs_books_dataset.csv')
catagory=pd.read_csv("oop_dsa_books_dataset.csv")

merge_datasets=pd.concat([books,catagory],ignore_index=True)
# merged_df = merge_datasets.iloc[:, :-3]

# merged_df.head()

merge_datasets.to_csv('sample.csv',index=False)

In [None]:
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load your dataset
df = pd.read_csv("merged_books_dataset.csv")   

# Fill missing values
df = df.fillna("")

# Step 2: Combine useful fields
df["features"] = (
    df["title"] + " " +
    df["authors"] + " " +
    df["publisher"] + " " +
    df["description"] + " " +
    df["categories"]
)

# Step 3: TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["features"])

# Step 4: Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 5: Save to pickle
with open("book_recommender.pkl", "wb") as f:
    pickle.dump((df, tfidf, cosine_sim), f)

print(" Pickle file saved as book_recommender.pkl")


 Pickle file saved as book_recommender.pkl


# Fixed Book Recommendation System

The issue was in the pickle loading - we were mixing up the saved objects. Here's the corrected version:

In [None]:
import pickle
from sklearn.metrics.pairwise import cosine_similarity

# Correct loading - match what we saved: (df, tfidf, cosine_sim)
with open("book_recommender.pkl", "rb") as f:
    df, tfidf, cosine_sim = pickle.load(f)

def recommend_books(user_input, n=5):
    # Transform user query into vector
    query_vec = tfidf.transform([user_input])
    
    # Get TF-IDF matrix for all books
    tfidf_matrix = tfidf.transform(df["features"])
    
    # Compute similarity with all books
    sim_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()

    # Get top n matches
    top_indices = sim_scores.argsort()[-n:][::-1]

    return df.iloc[top_indices][["title", "authors", "publisher", "categories"]]

# Example searches:
print(" Search by description")
store=(recommend_books("absolute beginners programming", 3))   
print(store["title"].values)
# print("\n=== Search by author ===")
store2=(recommend_books("Anthony Ralston", 3))                  
print(store2["title"].values)
# print(recommend_books("Programming with Class", 3))

 Search by description
['Computer Programming for Absolute Beginners'
 'Python Programming for Beginners: A Comprehensive Introduction'
 'Python Programming']
['Introduction to Programming and Computer Science' 'Using MPI'
 'Python Programming, Deep Learning']
