In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.decomposition import TruncatedSVD

In [3]:
books = pd.read_csv('books.csv')

In [4]:
books

Unnamed: 0,title,price,isbn,publication_year,language,cover_type,pages_number,book_id,author_id,publisher_id,Genre,Rating
0,Դառը ծովը,5800.0,9789939884943,2023.0,Armenian,Hard,336.0,1,1,1,Non-Fiction,0.5
1,Աշխարհի պատմությունը 10½ գլուխներով,5990.0,9789939980256,2023.0,Armenian,Hard,480.0,2,2,2,Science Fiction,2.5
2,Ալ տառը,4990.0,9789939980140,2023.0,Armenian,Ultrathin,280.0,3,3,2,Fiction,4.5
3,Լուսնաքարը,5990.0,9789939980089,2023.0,Armenian,Hard,656.0,4,4,2,Non-Fiction,4.9
4,«Ծիածանը» և ․․․,8400.0,9789939113852,2023.0,Armenian,Hard,168.0,5,5,3,Romance,2.2
...,...,...,...,...,...,...,...,...,...,...,...,...
22792,Կանաչ ու սև․ 2020 թվականի հայկական աղետը,6000.0,9789939882925,2021.0,Armenian,Soft,516.0,22793,1490,142,Romance,2.7
22793,Մահվան հովիտ,4000.0,9789939882963,2021.0,Armenian,Soft,360.0,22794,1490,142,History,2.3
22794,Իբր պոեզիա,3800.0,9789939877747,2021.0,Armenian,Soft,80.0,22795,1988,9,Science Fiction,2.3
22795,Ռազմարվեստ․ Ցամաքային ճեղքում․ Հատոր 2,25000.0,9789939034911,2020.0,Armenian,Hard,1333.0,22796,1734,160,Romance,4.6


In [110]:
books['content'] = books['title'] + ' ' + books['author_id'].astype(str) + ' ' + books['Genre']

In [111]:
# TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(books['content'])

In [112]:
# Include the 'rating' as a numerical feature
rating_matrix = books['Rating'].values.reshape(-1, 1)

In [113]:
combined_matrix = pd.concat([pd.DataFrame(tfidf_matrix.toarray()), pd.DataFrame(rating_matrix)], axis=1)

In [114]:
svd = TruncatedSVD(n_components=100)  # Adjust the number of components as needed
tfidf_matrix_reduced = svd.fit_transform(tfidf_matrix)

In [115]:
# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix_reduced, tfidf_matrix_reduced)

In [116]:
def get_recommendations(title, books_data):
    idx = books_data.loc[books_data['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Get top 5 similar books
    book_indices = [i[0] for i in sim_scores]
    recommended_books = books_data.iloc[book_indices].copy()  # Create a copy to avoid modifying the original DataFrame
    return recommended_books

In [130]:
# Get user input for the book title
title_to_recommend = input("Enter a book title: ")
recommendations = get_recommendations(title_to_recommend, books)
recommendations

Enter a book title: Темная сторона


Unnamed: 0,title,price,isbn,publication_year,language,cover_type,pages_number,book_id,author_id,publisher_id,Genre,Rating,content
4245,Сияние,2800.0,9785171132712,2022.0,Russian,Soft,640.0,4246,242,11,Romance,1.8,Сияние 242 Romance
20260,Сияние,4700.0,9785170840786,2014.0,Russian,Hard,544.0,20261,242,11,Romance,1.7,Сияние 242 Romance
14469,Ս.,4990.0,9789939766508,2021.0,Armenian,Hard,296.0,14470,962,2,Romance,3.6,Ս. 962 Romance
7008,Томминокеры,4400.0,9785171021689,2023.0,Russian,Hard,736.0,7009,242,11,Romance,1.3,Томминокеры 242 Romance
3565,One Of Us Is Next,5900.0,9780241376928,2023.0,English,Soft,384.0,3566,391,45,Romance,2.5,One Of Us Is Next 391 Romance
