In [None]:
%pip install ollama

In [None]:
%pip install matplotlib

In [3]:
%pip install pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
%pip install nbimporter

Defaulting to user installation because normal site-packages is not writeable
Collecting nbimporter
  Downloading nbimporter-0.3.4-py3-none-any.whl (4.9 kB)
Installing collected packages: nbimporter
Successfully installed nbimporter-0.3.4
Note: you may need to restart the kernel to use updated packages.


In [None]:
%pip install nbformat

ModuleNotFoundError: No module named 'vector_only'

In [2]:
!ollama list

NAME           ID              SIZE      MODIFIED     
qwen3:32b      030ee887880f    20 GB     27 hours ago    
gpt-oss:20b    aa4295ac10c3    13 GB     27 hours ago    
gemma3:12b     f4031aab637d    8.1 GB    27 hours ago    
llama3.2:1b    baf6a787fdff    1.3 GB    27 hours ago    
mistral:7b     6577803aa9a0    4.4 GB    27 hours ago    
r1-1776:70b    140ea940f21d    42 GB     6 days ago      
gemma3:270m    e7d36fb2c3b3    291 MB    6 days ago      


In [None]:
!ollama pull mistral:7b

In [None]:
!ollama pull llama3.2:1b

In [None]:
!ollama pull gemma3:12b

In [None]:
!ollama pull gpt-oss:20b

In [81]:
import json

def extract_user_emotion_data(jsonl_path: str) -> list[dict]:
    """
    Extracts user emotion vectors and book preference information from JSONL file.
    Excludes books with rating = 0.
    
    Returns a list of dicts with structure:
    {
        "user_id": int,
        "user_emotion_vector": dict,
        "preference_books": list[dict],
        "all_books": list[dict]
    }
    """
    users_data = []
    
    with open(jsonl_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            
            try:
                record = json.loads(line)
                
                # Filter out books with rating = 0
                all_books = [b for b in record.get("all_books", []) if b.get("rating", 0) != 0]
                preference_books = [b for b in record.get("preference_books", []) if b.get("rating", 0) != 0]
                
                extracted = {
                    "user_id": record.get("user"),
                    "user_emotion_vector": record.get("user_emotion_vector", {}),
                    "preference_books": preference_books,
                    "all_books": all_books
                }
                
                users_data.append(extracted)
                
            except json.JSONDecodeError as e:
                print(f"Skipping line: JSON error â†’ {e}")
                continue
    
    return users_data


# Usage
emotion_data = extract_user_emotion_data(
    "/users/guest/a/annegg03/LLM_Semantics/personalization/combination/user_emotion_vectors_for_all_books_with_preferences.jsonl"
)

# Access the data
for user in emotion_data:
    print(f"User {user['user_id']}: {len(user['preference_books'])} preference books, {len(user['all_books'])} total books")

User 709: 6 preference books, 21 total books
User 741: 6 preference books, 14 total books
User 4795: 6 preference books, 13 total books
User 5448: 6 preference books, 14 total books
User 8936: 22 preference books, 49 total books
User 10199: 7 preference books, 14 total books
User 10354: 29 preference books, 60 total books
User 12272: 7 preference books, 14 total books
User 12538: 57 preference books, 132 total books
User 12824: 42 preference books, 94 total books
User 19664: 22 preference books, 53 total books
User 22521: 10 preference books, 32 total books
User 26057: 8 preference books, 18 total books
User 27313: 6 preference books, 13 total books
User 28360: 10 preference books, 29 total books
User 28594: 17 preference books, 53 total books
User 30495: 6 preference books, 13 total books
User 30779: 6 preference books, 16 total books
User 31900: 11 preference books, 25 total books
User 32569: 8 preference books, 16 total books
User 32599: 11 preference books, 24 total books
User 3764

In [82]:
def get_all_remaining_books(user: dict) -> list[dict]:
    """
    Returns all books except those already in preference_books.
    """
    preference_isbns = {book["isbn"] for book in user.get("preference_books", [])}
    
    remaining_books = [
        book for book in user.get("all_books", [])
        if book["isbn"] not in preference_isbns
    ]
    
    return remaining_books

In [83]:
import random

def build_ranking_prompt_with_user_profile(
    preference_books: list[dict],
    ranking_books: list[dict],
    emotion_vector: dict = None,
) -> str:
    """
    Builds a prompt that creates a user profile from preference books and emotion vector,
    then asks the LLM to rank all remaining books.
    """

    # Shuffle ranking books to avoid order bias
    shuffled_books = ranking_books[:]
    random.shuffle(shuffled_books)

    # --- User Profile Section ---
    user_profile = "USER PROFILE\n\n"
    
    user_profile += "Books this user has rated highly:\n\n"
    for i, book in enumerate(preference_books, start=1):
        user_profile += f"{i}. {book['title']} by {book['author']} (Rating: {book['rating']}/10)\n"
    
    # --- Emotion vector context ---
    if emotion_vector:
        user_profile += "\nEmotional preferences derived from reading history:\n\n"
        for emotion, score in emotion_vector.items():
            user_profile += f"- {emotion}: {score:.2%}\n"

    # --- Ranking candidates ---
    candidate_lines = []
    for book in shuffled_books:
        candidate_lines.append(
            f"- {book['title']} by {book['author']}\n"
        )

    # --- Final prompt ---
    prompt = f"""
You are a book recommendation system specializing in personalized rankings.

{user_profile}

Based on this user profile, rank the following books from MOST to LEAST likely to match this user's preferences.

Books to rank:

{chr(10).join(candidate_lines)}

Instructions:
- Rank ALL books listed above based on alignment with the user's demonstrated preferences and emotional profile
- Consider the themes, tone, and genres of the user's highly-rated books
- Factor in the user's emotional preferences when making your ranking
- Do NOT use general popularity or quality metrics

IMPORTANT CONSTRAINTS:
- Rank ALL books listed under "Books to rank"
- Output ONLY a ranked numbered list with book titles
- Do NOT introduce any new books
- Do NOT reference the preference books in your ranking

Output format:
Return ONLY a numbered ranked list with the book titles:

1. Book Title 1
2. Book Title 2
3. Book Title 3
...
"""

    return prompt

In [None]:
"""You are a recommendation system.
Your task is to rank books based on how likely a specific user would enjoy them, given their past preferences."""

In [84]:
import ollama

def run_ollama(prompt, model):
    response = ollama.chat(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    return response["message"]["content"].strip()

In [95]:
def format_candidate_books(ranking_books, user):

    formatted = []
    for book in ranking_books:
        entry = {"title": book["title"]}
        if "rating" in book:
            entry["user_rating"] = book["rating"]
        formatted.append(entry)

    #formatted.sort(key=lambda x: x.get("user_rating", 0), reverse=True)

    return formatted

In [96]:
def run_sample_on_models_new(users_sample, models):
    """
    Run a sample of users through multiple models with new prompt approach.
    """
    results = []

    for user in users_sample:
        preference_books = user.get("preference_books", [])
        ranking_books = get_all_remaining_books(user)
        emotion_vector = user.get("user_emotion_vector", {})
        
        # Skip if no remaining books to rank
        if not ranking_books:
            continue
        
        prompt = build_ranking_prompt_with_user_profile(preference_books, ranking_books, emotion_vector)
        candidate_books = format_candidate_books(ranking_books, user)

        for model in models:
            try:
                output = run_ollama(prompt, model)
            except Exception as e:
                output = f"Error: {e}"

            results.append({
                "user_id": user["user_id"],
                "model": model,
                "prompt": prompt,
                "candidate_books": candidate_books,
                "llm_output": output
            })

    return results

In [99]:
users_sample = emotion_data
models = ['llama3.2:1b', 'mistral:7b', 'gemma3:12b'] #'gpt-oss:20b'

results = run_sample_on_models_new(
    users_sample,
    models
)

In [100]:
import json

with open("combined_ranking_all_users_results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=2)