<a href="https://colab.research.google.com/github/busraguven/book-recommender/blob/main/book_recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 📚 Book Recommender Project — Starter Notebook
# Author: [Ulviye Busra Guven]
# Goal: Build a multimodal book recommender (text + metadata)

!pip install pandas numpy scikit-learn sentence-transformers faiss-cpu -q
import pandas as pd
import numpy as np


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m75.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
data = [
    {"title": "The Great Gatsby", "author": "F. Scott Fitzgerald", "genre": "Classic",
     "description": "A tragic story of wealth, love and the American dream."},
    {"title": "Pride and Prejudice", "author": "Jane Austen", "genre": "Romance",
     "description": "A witty exploration of manners, marriage and social standing."},
    {"title": "Dune", "author": "Frank Herbert", "genre": "Sci-Fi",
     "description": "An epic saga of politics, prophecy, and survival on the desert planet Arrakis."},
    {"title": "1984", "author": "George Orwell", "genre": "Dystopian",
     "description": "A chilling vision of a totalitarian future and the loss of freedom."},
    {"title": "The Hobbit", "author": "J.R.R. Tolkien", "genre": "Fantasy",
     "description": "A hobbit embarks on a perilous adventure with dwarves and dragons."}
]

df = pd.DataFrame(data)
df


Unnamed: 0,title,author,genre,description
0,The Great Gatsby,F. Scott Fitzgerald,Classic,"A tragic story of wealth, love and the America..."
1,Pride and Prejudice,Jane Austen,Romance,"A witty exploration of manners, marriage and s..."
2,Dune,Frank Herbert,Sci-Fi,"An epic saga of politics, prophecy, and surviv..."
3,1984,George Orwell,Dystopian,A chilling vision of a totalitarian future and...
4,The Hobbit,J.R.R. Tolkien,Fantasy,A hobbit embarks on a perilous adventure with ...


In [3]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")

df["embeddings"] = df["description"].apply(lambda x: model.encode(x))
df.head()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Unnamed: 0,title,author,genre,description,embeddings
0,The Great Gatsby,F. Scott Fitzgerald,Classic,"A tragic story of wealth, love and the America...","[0.05105867, 0.047107026, 0.0016340603, 0.0483..."
1,Pride and Prejudice,Jane Austen,Romance,"A witty exploration of manners, marriage and s...","[-0.06418331, 0.010296231, 0.07766996, 0.04040..."
2,Dune,Frank Herbert,Sci-Fi,"An epic saga of politics, prophecy, and surviv...","[-0.016855959, 0.08851563, -0.10087649, 0.0280..."
3,1984,George Orwell,Dystopian,A chilling vision of a totalitarian future and...,"[-0.009697904, 0.07350759, -0.03385395, 0.0277..."
4,The Hobbit,J.R.R. Tolkien,Fantasy,A hobbit embarks on a perilous adventure with ...,"[0.044445246, 0.086821035, -0.033670597, 0.010..."


In [4]:
import faiss

# Create the index
dim = len(df["embeddings"][0])
index = faiss.IndexFlatIP(dim)  # cosine similarity via dot product
emb_matrix = np.vstack(df["embeddings"].values)
faiss.normalize_L2(emb_matrix)
index.add(emb_matrix)

# Function to get top-N similar books
def recommend(title, top_n=3):
    idx = df.index[df["title"] == title][0]
    query = df.loc[idx, "embeddings"].reshape(1, -1)
    faiss.normalize_L2(query)
    scores, indices = index.search(query, top_n + 1)
    results = df.iloc[indices[0][1:]][["title", "author", "genre"]]
    results["score"] = scores[0][1:]
    return results

recommend("The Hobbit")


Unnamed: 0,title,author,genre,score
2,Dune,Frank Herbert,Sci-Fi,0.229081
3,1984,George Orwell,Dystopian,0.163529
1,Pride and Prejudice,Jane Austen,Romance,0.091205
