# Demo of use Vector Store

In [1]:
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import torch, pandas as pd, faiss, requests, textwrap
from io import StringIO

  from .autonotebook import tqdm as notebook_tqdm
2025-06-24 18:23:57.144765: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-24 18:23:57.153692: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750782237.163647    2710 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750782237.166645    2710 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1750782237.174847    2710 computation_placer.cc:177] computation placer already r

# STEP 1

In [2]:
print("Downloading IMDb movie metadata…")

CSV_URL = (
  "https://raw.githubusercontent.com/"
  "sundeepblue/movie_rating_prediction/master/movie_metadata.csv"
)

try:
  raw = requests.get(CSV_URL, timeout=20)
  raw.raise_for_status()
  df = pd.read_csv(StringIO(raw.text))

# ─── Minimal cleaning ────────────────────────────────────

  df["movie_title"]  = df["movie_title"].str.strip()
  df["genres"]    = df["genres"].str.replace("|", ", ")
  df["plot_keywords"] = df["plot_keywords"].fillna("").str.replace("|", ", ")

  # Unified “text” field for retrieval

  df["text"] = (
    "Title: "    + df["movie_title"] + ". "
    "Director: "   + df["director_name"].fillna("Unknown") + ". "
    "Genres: "    + df["genres"] + ". "
    "Plot keywords: "+ df["plot_keywords"] + "."
  )
  df = df.head(2_000)            # ← keep notebook snappy
  print(f"✅ Loaded {len(df)} movies.")

except Exception as e:
  print(f"❌ Download failed: {e}")
  df = pd.DataFrame()

Downloading IMDb movie metadata…
✅ Loaded 2000 movies.


In [3]:
print(df[["movie_title", "plot_keywords"]].head())

                                  movie_title  \
0                                      Avatar   
1    Pirates of the Caribbean: At World's End   
2                                     Spectre   
3                       The Dark Knight Rises   
4  Star Wars: Episode VII - The Force Awakens   

                                       plot_keywords  
0         avatar, future, marine, native, paraplegic  
1  goddess, marriage ceremony, marriage proposal,...  
2            bomb, espionage, sequel, spy, terrorist  
3  deception, imprisonment, lawlessness, police o...  
4                                                     


# STEP 2 

In [4]:
df["text"] = df["movie_title"].str.strip() + ": " + df["plot_keywords"].fillna("")

texts = df["text"].tolist()

# STEP 3 

In [5]:
import openai
from dotenv import load_dotenv  
load_dotenv('API_KEYS.env')  

#openai.api_key = "sk-..."  # yNEVER DO THIS

def batch_embed(texts, model="text-embedding-3-small", batch_size=100):
    embeddings = []
    for i in range(0, len(texts), batch_size):
        response = openai.embeddings.create(
            input=texts[i:i+batch_size],
            model=model
        )
        embeddings.extend([item.embedding for item in response.data])
    return embeddings

embeddings = batch_embed(texts)


# STEP 4

In [6]:
# Create a new vector store
vector_store = openai.vector_stores.create(name="movies-demo")
vector_store_id = vector_store.id
print("Vector Store ID:", vector_store_id)

Vector Store ID: vs_685ad13825dc8191bb9c0bf6cfbf2015


# STEP 5

In [7]:
import json

records = []
for idx, row in df.iterrows():
    record = {
        "text": row["text"],
        "metadata": {
            "movie_title": row["movie_title"].strip(),
            "index": int(idx)
        }
    }
    records.append(record)

with open("movie_docs.json", "w", encoding="utf-8") as f:
    json.dump(records, f, ensure_ascii=False)

# STEP 5 - UPLOAD TO VECTOR STORE

In [8]:
with open("movie_docs.json", "rb") as f:
    file_batch = openai.vector_stores.file_batches.upload_and_poll(
        vector_store_id=vector_store_id,
        files=[f]
    )


# STEP 6 – find movies by plot - ASSISTANT

In [9]:
# Create an assistant with the vector store
print("\n🤖 Creating assistant...")
assistant = openai.beta.assistants.create(
    name="Movie Recommendation Assistant",
    instructions="""You are a helpful movie recommendation assistant. You have access to a database of movies with their plot keywords and metadata. When users ask for movie recommendations, search through the database to find relevant movies and provide thoughtful recommendations based on their preferences.""",
    model="gpt-4o-mini",
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [vector_store_id]}}
)


🤖 Creating assistant...


# STEP 6 – find movies by plot – GET RECOMENDATIONS FUNCTION

In [10]:
# Function to get movie recommendations
def get_movie_recommendations(query):
    """Get movie recommendations based on user query"""
    
    # Create a thread
    thread = openai.beta.threads.create()
    
    # Add message to thread
    message = openai.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=query
    )
    
    # Run the assistant
    run = openai.beta.threads.runs.create_and_poll(
        thread_id=thread.id,
        assistant_id=assistant.id
    )
    
    # Get the response
    if run.status == 'completed':
        messages = openai.beta.threads.messages.list(thread_id=thread.id)
        return messages.data[0].content[0].text.value
    else:
        return f"Error: Run status is {run.status}"


In [None]:
# STEP 6 – find movies by plot – GET RECOMENDATIONS 

In [11]:
# Test the recommendation system
print("\n🎬 Testing movie recommendations...")

queries = [
    "Can you recommend some time travel adventure movies?",
    "I'm looking for spy thriller movies with espionage",
    "What are some good movies about the future or sci-fi?"
]

for query in queries:
    print(f"\n📝 Query: {query}")
    print("🎭 Recommendations:")
    try:
        response = get_movie_recommendations(query)
        print(textwrap.fill(response, width=80))
    except Exception as e:
        print(f"❌ Error: {e}")
    print("-" * 80)

# Cleanup function (optional)
def cleanup():
    """Clean up resources"""
    try:
        # Delete the assistant
        openai.assistants.delete(assistant.id)
        print(f"✅ Deleted assistant {assistant.id}")
        
        # Delete the vector store
        openai.vector_stores.delete(vector_store_id)
        print(f"✅ Deleted vector store {vector_store_id}")
        
        # Remove local file
        import os
        if os.path.exists("movie_docs.json"):
            os.remove("movie_docs.json")
            print("✅ Deleted local JSON file")
            
    except Exception as e:
        print(f"❌ Cleanup error: {e}")

print("\n🎉 Demo complete! You can now use get_movie_recommendations() to get movie suggestions.")
print("💡 To clean up resources, run: cleanup()")


🎬 Testing movie recommendations...

📝 Query: Can you recommend some time travel adventure movies?
🎭 Recommendations:


  thread = openai.beta.threads.create()
  message = openai.beta.threads.messages.create(
  run = openai.beta.threads.runs.create_and_poll(
  messages = openai.beta.threads.messages.list(thread_id=thread.id)


Here are some great time travel adventure movies for you to consider:  1. **Back
to the Future** - A classic adventure where Marty McFly accidentally travels
back in time to the 1950s, disrupting his parents’ meeting. 2. **The Time
Machine** - This adaptation of H.G. Wells' novel follows a scientist who invents
a time machine and explores the distant future【4:1†source】. 3. **Terminator 2:
Judgment Day** - An action-packed sequel featuring a time-traveling cyborg sent
back to protect a future leader【4:2†source】. 4. **Looper** - A unique spin on
time travel where hitmen called "loopers" eliminate targets sent from the
future, leading to a shocking twist【4:8†source】. 5. **Midnight in Paris** - A
charming tale of a writer who finds himself mysteriously transported to the
1920s nightly, exploring his literary inspirations【4:10†source】. 6. **Kate &
Leopold** - A romantic comedy in which a 19th-century Duke travels through time
to modern New York【4:9†source】. 7. **X-Men: Days of Future Past**

# STEP 6 - MINIMAL EXAMPLE 

In [14]:
query = "adventure movies"
thread = openai.beta.threads.create()
    
# Add message to thread
message = openai.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=query
    )
    
# Run the assistant
run = openai.beta.threads.runs.create_and_poll(
        thread_id=thread.id,
        assistant_id=assistant.id
    )
messages = openai.beta.threads.messages.list(thread_id=thread.id)
messages.data[0].content[0].text.value

  thread = openai.beta.threads.create()
  message = openai.beta.threads.messages.create(
  run = openai.beta.threads.runs.create_and_poll(
  messages = openai.beta.threads.messages.list(thread_id=thread.id)


"Here are some adventure movies that you might enjoy:\n\n1. **The River Wild** - This film features a thrilling rafting journey where a family faces danger from a criminal on the run .\n\n2. **Jurassic Park** - A classic adventure that takes viewers to a theme park filled with dinosaurs, where survival becomes the ultimate goal .\n\n3. **Indiana Jones and the Temple of Doom** - The iconic archaeologist embarks on a dangerous adventure in an exotic locale, facing various challenges and foes .\n\n4. **Lara Croft: Tomb Raider** - This film follows the adventures of a tomb raider who must uncover ancient secrets and battle adversaries in her quest .\n\n5. **Journey to the Center of the Earth** - A scientific expedition leads to unforeseen perils in a fantastical underground world, perfect for adventure seekers .\n\n6. **The Chronicles of Riddick** - A science fiction adventure that captures a unique blend of action and exploration in a futuristic universe .\n\n7. **Life of Pi** - This visu