In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from scipy.sparse import hstack


In [2]:
df = pd.read_csv("data\cleaned_output_file.csv")



In [3]:
#min max scaler for rating and year
scaler = MinMaxScaler()
df['Rating Scale'] = scaler.fit_transform(df[['Average Rating']])
df['Year Scale'] = scaler.fit_transform(df[['Publish Date']])



#tf idf for synop + reviews
vectorizer = TfidfVectorizer(ngram_range=(1,2))

columnstoVectorize = ("Proccessed Synopsis" , "Proccessed Review")

reviews_tfidf = vectorizer.fit_transform(df['Proccessed Review'])
synopsis_tfidf = vectorizer.fit_transform(df['Proccessed Synopsis'])

#genres, book titles authors
if df['Genres'] is str:
    df['Genres'] = df['Genres'].str.split("'")
    print(df['Genres'])
encoder = OneHotEncoder(handle_unknown='ignore')
encoded_genres = encoder.fit_transform(df['Genres'].apply(lambda x: pd.Series(x)).stack().reset_index(level=1, drop=True).to_frame('Genres'))
encoded_titles = encoder.fit_transform(df[['Book Title']])
encoded_authors = encoder.fit_transform(df[['Author']])
encoded_features = hstack([encoded_titles, encoded_authors, encoded_genres])

#combing everythng together
combined_features = hstack([encoded_features, df[['Rating Scale', 'Year Scale']], reviews_tfidf, synopsis_tfidf])


In [4]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel
import numpy as np

similarity_matrix = cosine_similarity(combined_features)



In [5]:
def get_recommendations(title, similarity_matrix, df, top_n=10):
    idx = df.index.get_loc(title)
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    book_indices = [i[0] for i in sim_scores]

    return df.iloc[book_indices].index.tolist()

In [13]:
#df.set_index('Book Title', inplace=True)


import tkinter as tk
from tkinter import ttk

class AutocompleteCombobox(ttk.Combobox):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.book_titles = sorted(kwargs.get('values', []))  # List of book titles
        self._completion_list = self.book_titles
        self.bind('<Enter>', self.handle_keyrelease)
        self._hits = []

    def handle_keyrelease(self, event):
        if event.keysym == "BackSpace":
            self.delete(self.index(tk.INSERT), tk.END)
        elif event.keysym == "Left" or event.keysym == "Right":
            return
        self._update_autocomplete()

    def _update_autocomplete(self):
        typed = self.get()
        if typed == '':
            self._hits = self._completion_list
        else:
            self._hits = [title for title in self._completion_list if typed.lower() in title.lower()]
        if self._hits:
            self['values'] = self._hits
            self.event_generate('<Down>')


def update_recommendations(event):
    selected_book = book_var.get()
    recommendations = get_recommendations(selected_book, similarity_matrix, df)
    recommendation_list.delete(0, tk.END)
    for book in recommendations:
        recommendation_list.insert(tk.END, book)

# Tkinter window setup
root = tk.Tk()
root.title("Book Recommendation System")

book_var = tk.StringVar()
book_titles = df.index.tolist()

book_dropdown = AutocompleteCombobox(root, textvariable=book_var, values=book_titles)
book_dropdown.grid(row=0, column=0, padx=10, pady=10)
book_dropdown.bind('<<ComboboxSelected>>', update_recommendations)

recommendation_list = tk.Listbox(root, width=50, height=20)
recommendation_list.grid(row=1, column=0, padx=10, pady=10)

root.mainloop()