In [1]:
!pip install llama-cpp-python langchain faiss-cpu -q
!pip install gradio -q
!pip install -U langchain-community accelerate bitsandbytes transformers sentence-transformers -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.7/66.7 MB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m46.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.12.0 which is incompatible.[0m[31m
[2K   [90m━━━━━━━━━━━━━━━━━━━━

In [2]:
import numpy as np 
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
import gradio as gr
import ast
import os
from functools import lru_cache

import warnings
warnings.filterwarnings('ignore')

In [3]:
import pkg_resources  # or import importlib.metadata as metadata

packages = [
    "numpy",
    "pandas",
    "langchain",
    "gradio",
    "faiss-cpu",  # or "faiss-gpu" if applicable
    "torch",
    "transformers",
    "accelerate",
    "sentence_transformers",
    "llama-cpp-python",
]

for package_name in packages:
    try:
        if hasattr(pkg_resources, 'require'): #older python versions.
            version = pkg_resources.require(package_name)[0].version
        else: #python 3.8+
            version = metadata.version(package_name)
        print(f"{package_name} version: {version}")
    except pkg_resources.DistributionNotFound: #Older python versions.
        print(f"{package_name} is not installed.")
    except ImportError: #python 3.8+
        print(f"{package_name} is not installed.")

numpy version: 1.26.4
pandas version: 2.2.3
langchain version: 0.3.19
gradio version: 5.20.0
faiss-cpu version: 1.10.0
torch version: 2.5.1+cu121
transformers version: 4.49.0
accelerate version: 1.4.0
sentence_transformers version: 3.4.1
llama-cpp-python version: 0.3.7


### **Precompute & Cache FAISS Index**

In [4]:
# --------------------------
# 1. Precompute & Cache FAISS Index
# --------------------------

def get_vectorstore():
    """Cache FAISS index to avoid recomputing"""
    data = pd.read_csv('/kaggle/input/movie-recommendation-data/movies_metadata.csv')
    
    # Preprocessing pipeline
    data['genres'] = data['genres'].apply(ast.literal_eval).apply(lambda x: [g['name'] for g in x])
    data = data.dropna(subset=['vote_count', 'vote_average'])
    
    # IMDb formula for weighted rating
    C = data['vote_average'].mean()
    m = data['vote_count'].quantile(0.90)
    data = data[data['vote_count'] >= m]
    data['weighted_rate'] = (data['vote_count'] / (data['vote_count'] + m)) * data['vote_average'] + (m / (data['vote_count'] + m)) * C
    
    # Create text chunks with metadata
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=512,  # Reduced from 1024 for better retrieval speed
        chunk_overlap=30
    )
    texts = text_splitter.split_text("\n".join(
        f"Title: {row['title']}. Plot: {row['overview']}. Genres: {', '.join(row['genres'])}. Rating: {row['weighted_rate']:.1f}"
        for _, row in data.iterrows()
    ))
    
    # Create FAISS index with metadata
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.from_texts(texts, embeddings)

vectorstore = get_vectorstore()
vectorstore.save_local("movie_faiss_index")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


### **Optimized LLM Configuration**


In [5]:
# --------------------------
# 2. Optimized LLM Configuration
# --------------------------
llm = LlamaCpp(
    model_path="/kaggle/input/gemma-2-9b-it/gguf/q4_k_m/1/gemma-2-9b-it-q4_k_m.gguf",
    temperature=0.3,
    max_tokens=300,        
    n_ctx=2048,           
    n_threads=6,           # Use physical cores only (check !nproc)
    n_batch=2048,          # Maximize batch throughput
    use_mlock=True,
    use_mmap=True,
    verbose=False,
    
    #rope_freq_base=1000000,# Better perplexity for shorter contexts
    #flash_attn=False       # Disable if CPU-only
)

llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


### **Efficient RetrievalQA Chain**

In [6]:
# --------------------------
# 3. Efficient RetrievalQA Chain
# --------------------------
prompt_template = """
You are an expert movie recommender. For user queries about actors/directors/genres:
1. Suggest 3 SPECIFIC movies with YEAR and LEAD ACTORS
2. Include 1 to 3-sentence descriptions
3. Explain WHY they match the request
4. NEVER suggest irrelevant movies

Example good response:
"Here are great Russell Crowe movies:
- Gladiator (2000): A former Roman general seeks revenge on the corrupt emperor who murdered his family and sentenced him to slavery. Features Crowe's iconic performance.
- A Beautiful Mind (2001): A Beautiful Mind is a 2001 American biographical drama film about the mathematician John Nash, a Nobel Laureate in Economics, played by Russell Crowe. Crowe won an Oscar for this role.
Why recommended? All showcase Crowe's range in historical dramas and character-driven stories."

Context: {context}
Question: {question}
Answer:"""
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), 
    chain_type_kwargs={"prompt": PromptTemplate.from_template(prompt_template)},
    return_source_documents=False  # Disable unused metadata
)
#----
#qa_chain = RetrievalQA.from_chain_type(
#    llm=llm,
#    chain_type="stuff",
#    retriever=vectorstore.as_retriever(
#        search_kwargs={"k": 2, "search_type": "mmr"}  # 2 docs + diversity
#    ),
#    chain_type_kwargs={
#        "prompt": PromptTemplate.from_template(prompt_template),
#        "document_prompt": PromptTemplate.from_template("{page_content}")  # No metadata
#    },
#   return_source_documents=False
#)

In [7]:
def handle_conversation(message, history):
    # Cold-start handling - show welcome message FIRST
    #if not history:
        # This will appear as FIRST message in chat
        # return "Welcome to MovieMaster! What kind of movies would you like today?"
    
    # Process subsequent messages normally
    result = qa_chain({"query": message})
    return result["result"]

In [8]:
# Launch Gradio interface
demo = gr.ChatInterface(
    fn=handle_conversation,
    title="MovieMaster 🎬",
    description="Your AI-powered movie recommendation assistant",
    examples=[
        "I like sci-fi movies with strong female leads",
        "Recommend something similar to Inception", 
        "What are the best movies about AI from 2010s?"
    ],
    chatbot=gr.Chatbot(
        value=[],  # Start with empty history
        height="calc(100vh - 200px)",  # Dynamic height (viewport minus header/footer)
        container=True,  # Allow chatbox to expand within its container
    ),
    theme=gr.themes.Soft(),
    
    
)

demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://29dee5aa5a8beda206.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




### **Lightweight Gradio Config**

In [9]:
!pip install memory_profiler
%load_ext memory_profiler
%memit handle_conversation("Recommend dark comedy films", [])

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0
peak memory: 7762.98 MiB, increment: 41.50 MiB
