In [None]:
#!pip install tmdbv3api
#!pip install openai
#!pip install pinecone-client


In [None]:
import openai
import requests
from tmdbv3api import TMDb, Discover, Movie
import time

In [None]:
tmdb = TMDb()
tmdb.api_key = ''

movie = Movie()

In [None]:
# Set up OpenAI API
openai.api_key = ""

In [None]:
search_result = movie.search('Inception')
inception = search_result[0]

In [None]:
inception

In [None]:
inception_details = movie.details(inception.id)
plot_summary = inception_details.overview
plot_summary

In [None]:
# Fetch movies (you can adjust the parameters as needed)
discover = Discover()

In [None]:
def fetch_movies(params, pages_limit=10, delay_seconds=1):
    all_movies = []
    for page in range(1, pages_limit + 1):
        print('Retrieving page', page)
        params["page"] = page
        movies = discover.discover_movies(params)
        all_movies.extend(movies)
        print('..movies total:', len(all_movies))

        # Be cautious of rate limits and sleep between requests if needed
        # time.sleep(1)
        # Sleep between requests to avoid rate limiting
        if page < pages_limit:
            time.sleep(delay_seconds)

    return all_movies

def fetch_movies_by_year(start_year, end_year, params, pages_limit=10):
    all_movies = []
    for year in range(start_year, end_year + 1):
        params["primary_release_year"] = str(year)
        movies = fetch_movies(params, pages_limit)
        all_movies.extend(movies)

    return all_movies

params = {
    "sort_by": "popularity.desc",
    "language": "en",
    "include_adult": "false",
    "include_video": "false",
}

# Fetch movies released between 2010 and 2020
movies = fetch_movies_by_year(2019, 2021, params, pages_limit=20)



In [None]:
len(movies)

In [None]:
movies[0]

In [None]:
# Helper function to generate embeddings
def generate_embeddings(text):
    response = openai.Embed.create(model="text-davinci-002", prompt=text)
    return response["embeds"][0]

# Generate embeddings for movie plot summaries
movie_embeddings = {}

In [None]:
response = openai.Embedding.create(
  input=movies[0]['overview'],
  model="text-embedding-ada-002"
)
len(response['data'][0]["embedding"])

In [None]:
import os
import pinecone
import numpy as np

# Your Pinecone API Key
api_key = ""

# Initialize Pinecone
pinecone.init(api_key=api_key, environment="us-west1-gcp")

# Create a new Pinecone index
index_name = "movies-index"

In [None]:
# pinecone.create_index(index_name, dimension=1536, metric="cosine", pod_type="p1")

In [None]:
# Connect to the Pinecone index
index = pinecone.Index(index_name)

In [None]:


for movie in movies:
    text_data = 'Title: ' + movie['title'] + '\nOverview: ' + movie['overview']    
    print('adding movie id', movie['id'], '>>', text_data)
    response = openai.Embedding.create(
      input=text_data,
      model="text-embedding-ada-002"
    )
    embedding = response['data'][0]["embedding"]
    meta = [{'text': text_data, 'poster_url': 'https://image.tmdb.org/t/p/w500' + movie['poster_path']}]
    
    # Ingest the chunk of text vectors into Pinecone
    index.upsert(vectors=zip([str(movie['id'])], [embedding], meta))



In [None]:
def get_matches(res, movie_map):
    for m in res['matches']:
        print('match:', m)
    return res['matches']

In [None]:
def convert_movies_to_map(movies):
    movie_map = {}
    for m in movies:
        movie_map[str(m['id'])] = m
    return movie_map
movie_map = convert_movies_to_map(movies)

In [None]:
def convert_movies_to_title_map(movies):
    movie_map = {}
    for m in movies:
        movie_map[str(m['title'])] = m
    return movie_map
movie_title_map = convert_movies_to_title_map(movies)

In [None]:
'https://image.tmdb.org/t/p/w500' + movies[0]['poster_path']

In [None]:
query = "spy movies ethan"
res = openai.Embedding.create(
    input=[query],
    engine="text-embedding-ada-002"
)

# retrieve from Pinecone
xq = res['data'][0]['embedding']

# get relevant contexts (including the questions)
res = index.query(xq, top_k=5, include_metadata=True)
matches = get_matches(res, movie_map)

match_ids = [m['id'] for m in matches]
match_ids