<a href="https://colab.research.google.com/github/elifmacit/booksearch/blob/main/booksearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from google.colab import drive
drive.mount('/content/drive')

# ---- Veritabanƒ±na baƒülan ----

DB_PATH = "/content/drive/MyDrive/books.db"
conn = sqlite3.connect(DB_PATH)

vectorizer = TfidfVectorizer(stop_words="english", max_features=50000)

# ---- SQL ile aday kitaplarƒ± √ßek (kelimelere b√∂lerek ) ----
def get_candidates(query):
    # 1. Sorguyu kelimelere ayƒ±r
    keywords = query.lower().split()

    # Bo≈ü sorgu gelirse bo≈ü DataFrame d√∂n
    if not keywords:
        return pd.DataFrame(columns=['id', 'title', 'authors', 'publisher', 'average_rating'])

    # 2. Dinamik SQL ve parametre listesi olu≈ütur
    # Aranacak alanlar
    fields = ['title', 'authors', 'publisher']

    where_clauses = []
    params = []

    # Her kelime i√ßin her alanda LIKE sorgusu olu≈ütur
    for key in keywords:
        like_key = f"%{key}%"
        for field in fields:
            where_clauses.append(f"lower({field}) LIKE ?")
            params.append(like_key)

    # 3. T√ºm "OR" ≈üartlarƒ±nƒ± birle≈ütir
    where_sql = " OR ".join(where_clauses)

    sql = f"""
    SELECT
        rowid AS id,
        COALESCE(title,'') AS title,
        COALESCE(authors,'') AS authors,
        COALESCE(publisher,'') AS publisher,
        COALESCE(average_rating,0) AS average_rating
    FROM books
    WHERE {where_sql}
    LIMIT 200;
    """

    # 4. Sorguyu √ßalƒ±≈ütƒ±r
    return pd.read_sql(sql, conn, params=params)

# ---- Arama fonksiyonu  ----
def search_books(query, top_k= 20, alpha=0.5):
    df = get_candidates(query)

    if len(df) == 0:
        print(f"‚ùó '{query}' i√ßin veri bulunamadƒ±.")
        return

    # Metin alanƒ±nƒ± birle≈ütir
    df["text"] = (
        df["title"].str.lower() + " " +
        df["authors"].str.lower() + " " +
        df["publisher"].str.lower()
    )

    # TF-IDF vekt√∂rleri olu≈ütur
    X = vectorizer.fit_transform(df["text"])
    q_vec = vectorizer.transform([query.lower()])

    # Kosin√ºs benzerliƒüi
    cosine_scores = cosine_similarity(q_vec, X).ravel()

    # Rating'i normalize et (0-1)
    r = df["average_rating"].astype(float).values

    # Paydada 0'a b√∂lme hatasƒ±nƒ± engelle (eƒüer t√ºm rating'ler aynƒ±ysa)
    r_range = r.max() - r.min()
    if r_range == 0:
        r_norm = np.zeros_like(r) # T√ºm√º 0 olabilir
    else:
        r_norm = (r - r.min()) / (r_range + 1e-9)


    # Aƒüƒ±rlƒ±klƒ± skor = cosine + rating
    final_score = (1 - alpha) * cosine_scores + alpha * r_norm

    # Sƒ±rala
    idx = np.argsort(-final_score)[:top_k]
    results = df.iloc[idx]

    # Sonu√ß yoksa
    if cosine_scores.max() < 0.01:
        print(f"‚ùó '{query}' ile ilgili anlamlƒ± sonu√ß bulunamadƒ±.")
        return

    # Yazdƒ±r
    print(f"\nüîé Arama: '{query}' ‚Äî Toplam {len(results)} sonu√ß\n")
    for _, row in results.iterrows():
        print(f"- {row['title']} ‚Äî {row['authors']} (Rating: {row['average_rating']})")

# ---- Kullanƒ±cƒ± etkile≈üimi (Deƒüi≈üiklik yok) ----
print("üìö Kitap Arama Sistemi ‚Äî bo≈ü Enter = √ßƒ±kƒ±≈ü")
while True:
    q = input("\nSorgu: ")
    if q.strip() == "":
        print("üëã √áƒ±kƒ±≈ü yapƒ±ldƒ±.")
        break
    search_books(q)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üìö Kitap Arama Sistemi ‚Äî bo≈ü Enter = √ßƒ±kƒ±≈ü


KeyboardInterrupt: Interrupted by user