In [1]:
#Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
#Sample book data
np.random.seed(42)

titles = [f'Book {chr(65 + i)}' for i in range(30)]
genres = np.random.choice(['Fiction', 'Non-Fiction', 'Mystery', 'Science-Fiction'], size=30)
authors = np.random.choice(['Author X', 'Author Y', 'Author Z'], size=30)

books_data = pd.DataFrame({
    'title': titles,
    'genre':genres,
    'author': authors
})

In [3]:
books_data

Unnamed: 0,title,genre,author
0,Book A,Mystery,Author X
1,Book B,Science-Fiction,Author X
2,Book C,Fiction,Author Y
3,Book D,Mystery,Author Y
4,Book E,Mystery,Author X
5,Book F,Science-Fiction,Author X
6,Book G,Fiction,Author X
7,Book H,Fiction,Author Z
8,Book I,Mystery,Author Z
9,Book J,Non-Fiction,Author Z


In [4]:
# Combine text feature for each book
def combine_features(row):
    return row['author'] + ' ' + row['genre']

In [5]:
books_data['combined_features'] = books_data.apply(combine_features, axis=1)

In [6]:
#Use CountVectorizer to convert textual data into a matrix of token counts
vectorizer = CountVectorizer()
feature_matrix = vectorizer.fit_transform(books_data['combined_features'])

In [7]:
feature_matrix

<30x5 sparse matrix of type '<class 'numpy.int64'>'
	with 76 stored elements in Compressed Sparse Row format>

In [8]:
print(feature_matrix[:10, :10].toarray())

[[1 0 1 0 0]
 [1 1 0 0 1]
 [1 1 0 0 0]
 [1 0 1 0 0]
 [1 0 1 0 0]
 [1 1 0 0 1]
 [1 1 0 0 0]
 [1 1 0 0 0]
 [1 0 1 0 0]
 [1 1 0 1 0]]


In [9]:
#Calculate cosine similarity between items (books)
item_similarities = cosine_similarity(feature_matrix)

In [10]:
def get_book_recommendation(user_profile, book_data, item_similarities):
    preferred_books = books_data[(book_data['genre'] == user_profile['genre']) &
                                (books_data['author'] == user_profile['author'])
                                ]
    if preferred_books.empty:
        return "No matching book found in the dataset for the given user profile."
    
    recommendations = {}
    for index, book in preferred_books.iterrows():
        book_similarities = item_similarities[index]
        similar_books = list(books_data.iloc[np.argsort(-book_similarities)[1:4]]['title'])
        recommendations[book['title']] = similar_books
        
    return recommendations

In [11]:
#Example user profile
user_profile = {'genre':'Mystery', 'author':'Author X'}
user_recommendations = get_book_recommendation(user_profile, books_data, item_similarities)
print('Recommendation for the user profile', user_recommendations)

Recommendation for the user profile {'Book A': ['Book N', 'Book M', 'Book L'], 'Book E': ['Book N', 'Book M', 'Book L']}
