In [1]:
!pip install llama-cpp-python langchain faiss-cpu -q
!pip install gradio  -q
!pip install -U langchain-community accelerate bitsandbytes transformers sentence-transformers  -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.7/66.7 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m44.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.12.0 which is incompatible.[0m[31m
[2K

In [2]:
import numpy as np 
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
import gradio as gr
import ast

import warnings
warnings.filterwarnings('ignore')

### Preprocessing

In [None]:

data = pd. read_csv('/kaggle/input/movie-recommendation-data/movies_metadata.csv')
print("Row data:")
display(data.head().T)

# Convert string representation of dictionaries to actual dictionaries
data['genres'] = data['genres'].apply(ast.literal_eval)

# Transforming the 'genres' column
data['genres'] = data['genres'].apply(lambda x: [genre['name'] for genre in x])


# Calculate weighted rate (IMDb formula)
def calculate_weighted_rate(vote_average, vote_count, min_vote_count=10):
    return (vote_count / (vote_count + min_vote_count)) * vote_average + (min_vote_count / (vote_count + min_vote_count)) * 5.0

# Minimum vote count to prevent skewed results
vote_counts = data[data['vote_count'].notnull()]['vote_count'].astype('int')
min_vote_count = vote_counts.quantile(0.95)

# Create a new column 'weighted_rate'
data['weighted_rate'] = data.apply(lambda row: calculate_weighted_rate(row['vote_average'], row['vote_count'], min_vote_count), axis=1)
data = data.dropna()
data = data[['genres', 'title', 'overview', 'weighted_rate']].reset_index(drop=True)


# Create a new column by combining 'title', 'overview', and 'genre'
data['combined'] = data.apply(lambda row: f"Title: {row['title']}. Overview: {row['overview']} Genres: {', '.join(row['genres'])}. Rating: {row['weighted_rate']}", axis=1)
print("""
Prepared data:""")
display(data.head())





### Embeding

In [None]:
# Split text for embedding
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=50
)

# Create texts AND metadatas together
texts = []
metadatas = []
for _, row in data.iterrows():
    # Split text for this row
    chunks = text_splitter.split_text(row['combined'])
    # Create metadata for each chunk
    for _ in chunks:
        metadatas.append({
            "title": row['title'],
            "overview": row['overview']
        })
    texts.extend(chunks)

# Now texts and metadatas have the same length
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)

# Save FAISS index (optional, for reuse)
vectorstore.save_local("movie_faiss_index")

### Load GGUF Model and Set up RAG Pipeline

In [9]:
import os
# Load Gemma-2B GGUF
llm = LlamaCpp(
    model_path="/kaggle/input/gemma-2-9b-it/gguf/q4_k_m/1/gemma-2-9b-it-q4_k_m.gguf",
    temperature=0.2,       # Lower temp for more deterministic answers
    max_tokens=256,        # Allow longer responses
    n_ctx=2048,            # Increased context for better understanding
    n_threads=os.cpu_count(),  # Fully utilize all CPU cores
    n_batch=512,           # Optimized batch size for smoother inference
    use_mlock=True,        # Lock model in RAM to prevent slow disk access
    use_mmap=True,         # Improve performance by memory-mapping the model
    verbose=False
)

# Custom prompt template

prompt_template = """
You are an expert movie recommender. For user queries about actors/directors/genres:
1. Suggest 3 SPECIFIC movies with YEAR and LEAD ACTORS
2. Include 1 to 3-sentence descriptions
3. Explain WHY they match the request
4. NEVER suggest irrelevant movies

Example good response:
"Here are great Russell Crowe movies:
- Gladiator (2000): A former Roman general seeks revenge on the corrupt emperor who murdered his family and sentenced him to slavery. Features Crowe's iconic performance.
- A Beautiful Mind (2001): A Beautiful Mind is a 2001 American biographical drama film about the mathematician John Nash, a Nobel Laureate in Economics, played by Russell Crowe. Crowe won an Oscar for this role.
Why recommended? All showcase Crowe's range in historical dramas and character-driven stories."

Context: {context}
Question: {question}
Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

# Set up RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 5}),
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True
)



llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


### Build Gradio Interface

In [None]:
def handle_conversation(message, history):
    # Cold-start handling
    if not history:
        return "Welcome to MovieMaster! What kind of movies would you like to discover today?"
    
    # Get recommendation
    result = qa_chain({"query": message})
    response = result["result"]
    
    return response  # Return ONLY the LLM's response

# Launch Gradio interface
demo = gr.ChatInterface(
    fn=handle_conversation,
    title="MovieMaster 🎬",
    description="Your AI-powered movie recommendation assistant",
    examples=[
        "I like sci-fi movies with strong female leads",
        "Recommend something similar to Inception",
        "What are the best romantic movies from 1990s?"
    ], 
    theme=gr.themes.Soft()
)


demo.launch(share=True)