In [14]:
pip install googlesearch-python

Collecting googlesearch-pythonNote: you may need to restart the kernel to use updated packages.

  Downloading googlesearch_python-1.3.0-py3-none-any.whl.metadata (3.4 kB)
Downloading googlesearch_python-1.3.0-py3-none-any.whl (5.6 kB)
Installing collected packages: googlesearch-python
Successfully installed googlesearch-python-1.3.0


In [16]:
pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install sumy

Collecting sumy
  Downloading sumy-0.11.0-py2.py3-none-any.whl.metadata (7.5 kB)
Collecting docopt<0.7,>=0.6.1 (from sumy)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting breadability>=0.1.20 (from sumy)
  Downloading breadability-0.1.20.tar.gz (32 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pycountry>=18.2.23 (from sumy)
  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Downloading sumy-0.11.0-py2.py3-none-any.whl (97 kB)
   ---------------------------------------- 0.0/97.3 kB ? eta -:--:--
   ------------------------------------- -- 92.2/97.3 kB 5.1 MB/s eta 0:00:01
   ------------------------------------- -- 92.2/97.3 kB 5.1 MB/s eta 0:00:01
   ---------------------------------------- 97.3/97.3 kB 617.6 kB/s eta 0:00:00
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
   --------------

In [17]:
import tkinter as tk
from tkinter import scrolledtext, messagebox
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier

# -------------------- Load and Prepare Dataset --------------------
df = pd.read_csv("goodreads_data.csv")
df_rec['Book'] = df_rec['Book'].str.lower()
title_to_index = {title: idx for idx, title in enumerate(df_rec['Book'])}
df_cleaned = df.dropna(subset=['Book', 'Genres'])
df_cleaned['Genres'] = df_cleaned['Genres'].apply(lambda x: " ".join(eval(x)) if isinstance(x, str) else "")

# For genre recommendation
df_rec = df_cleaned.copy()
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df_rec['Genres'])

nn_model = NearestNeighbors(n_neighbors=6, algorithm='auto', metric='cosine')
nn_model.fit(tfidf_matrix)

# For genre prediction
df_pred = df_cleaned[df_cleaned['Genres'].str.strip().astype(bool)].copy()
tfidf_features = vectorizer.fit_transform(df_pred['Book'])

genre_labels = df_pred['Genres']
genre_clf = KNeighborsClassifier(n_neighbors=5)
genre_clf.fit(tfidf_features, genre_labels)

title_to_index = pd.Series(df_rec.index, index=df_rec['Book'].str.lower())

# -------------------- Functions --------------------
def get_google_links(book_name, sites):
    links = {}
    for site in sites:
        query = f"{book_name} book site:{site}"
        for result in search(query, num_results=5):
            if site in result:
                links[site] = result
                break
    return links

def get_wikipedia_summary(wiki_url):
    try:
        response = requests.get(wiki_url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.select('p')
        summary = ""
        for para in paragraphs:
            text = para.get_text().strip()
            if text and not text.lower().startswith("coordinates"):
                summary += text + "\n\n"
            if len(summary.split()) > 120:
                break
        return summary.strip()
    except Exception as e:
        return f"Error fetching summary: {str(e)}"

def get_recommendations_by_genre_input(genre, top_n=5):
    matching_books = df_rec[df_rec['Genres'].str.lower().str.contains(str(genre).lower(), na=False)]
    if matching_books.empty:
        return ["No recommendations found."]
    idx = matching_books.index[0]
    distances, indices = nn_model.kneighbors(tfidf_matrix[idx], n_neighbors=top_n + 1)
    return df_rec.iloc[indices[0][1:]]['Book'].tolist()

def predict_genre(book_title):
    book_vector = vectorizer.transform([book_title])
    predicted_genre = genre_clf.predict(book_vector)[0]
    return predicted_genre

# -------------------- Main Book Search --------------------
def search_book():
    book_name = entry.get().strip()
    if not book_name:
        messagebox.showwarning("Input Error", "Please enter a book name.")
        return

    summary_text.delete(1.0, tk.END)

    # Wikipedia summary
    wiki_link = None
    for link in search(f"{book_name} book site:en.wikipedia.org", num_results=5):
        if "wikipedia.org" in link:
            wiki_link = link
            break

    if wiki_link:
        summary = get_wikipedia_summary(wiki_link)
        summary_text.insert(tk.END, f"📝 Summary (Wikipedia):\n{summary}\n\n")
        summary_text.insert(tk.END, f"🔗 Wikipedia: {wiki_link}\n\n")
    else:
        summary_text.insert(tk.END, "Wikipedia summary not found.\n\n")

    # Purchase links
    sites = ["amazon.com", "goodreads.com"]
    links = get_google_links(book_name, sites)
    for site, link in links.items():
        summary_text.insert(tk.END, f"🔗 {site.capitalize()}: {link}\n")

    # Genre & Recommendations
    book_lower = book_name.lower()
    if book_lower in title_to_index:
        genre_text = df_rec.loc[title_to_index[book_lower], 'Genres']
        summary_text.insert(tk.END, f"\n🎯 Genre (from dataset): {genre_text}\n")
        recommendations = get_recommendations_by_genre_input(genre_text)
    else:
        predicted_genre = predict_genre(book_name)
        summary_text.insert(tk.END, f"\n🤖 Predicted Genre: {predicted_genre}\n")
        recommendations = get_recommendations_by_genre_input(predicted_genre)

    summary_text.insert(tk.END, "\n📚 Recommended Books (by Genre):\n")
    for rec in recommendations:
        summary_text.insert(tk.END, f"- {rec}\n")

# -------------------- UI --------------------
root = tk.Tk()
root.title("📚 Book Finder + Genre Recommendation")
root.geometry("750x650")

label = tk.Label(root, text="Enter Book Name:", font=("Arial", 14))
label.pack(pady=10)

entry = tk.Entry(root, font=("Arial", 14), width=50)
entry.pack()

button = tk.Button(root, text="Search Book", font=("Arial", 12), command=search_book)
button.pack(pady=10)

summary_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, font=("Arial", 12))
summary_text.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)

root.mainloop()
