In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('Data/final_metadata.csv')

In [3]:
df.shape

(19951, 14)

In [4]:
df.drop(columns=['release_date','vote_average', 'vote_count'], inplace=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19951 entries, 0 to 19950
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 19951 non-null  int64  
 1   title              19951 non-null  object 
 2   genres             19951 non-null  object 
 3   original_language  19951 non-null  object 
 4   overview           19951 non-null  object 
 5   popularity         19951 non-null  float64
 6   keywords           19951 non-null  object 
 7   year               19951 non-null  int64  
 8   cast               19951 non-null  object 
 9   director           19951 non-null  object 
 10  score              19951 non-null  float64
dtypes: float64(2), int64(2), object(7)
memory usage: 1.7+ MB


In [6]:
#Combine title, synopsis, and Genre
df['soup'] = df.apply(lambda row: f"Title: {row['title']}. Genres: {row['genres']}. Keywords: {row['keywords']}. Cast: {row['cast']}. Director: {row['director']}.", axis=1)
df['soup'][0]

'Title: Godzilla x Kong: The New Empire. Genres: Science Fiction Action Adventure. Keywords: giantmonster sequel dinosaur kaiju fantasyworld giantape godzilla kingkong mongkey. Cast: RebeccaHall BrianTyreeHenry DanStevens. Director: Adam Wingard.'

In [7]:
# pip install -U langchain
# pip install -U langchain-community

In [8]:
from langchain.docstore.document import Document

movies = []

for index, row in df.iterrows():
    x = Document(page_content=row['soup'], 
                 metadata={
                     "movie": row['title'],
                     "language": row['original_language'], 
                     "popularity": row['popularity'], 
                     "year": row['year'],
                     "synopsis": row['overview'],
                     "score": row['score']
                     })
    movies.append(x)

# doc =  Document(page_content="text", metadata={"source": "local"})

In [9]:
len(movies)

19951

In [10]:
import weaviate

WEAVIATE_URL = "WEAVIATE_URL"
WEAVIATE_API_KEY = "WEAVIATE_API_KEY"

client = weaviate.Client(
    url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)
)

            your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.

            For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
            For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
            


In [11]:
from langchain_huggingface import HuggingFaceEmbeddings

# specify embedding model (using huggingface sentence transformer)
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(
  model_name=embedding_model_name, 
  model_kwargs=model_kwargs
)

  from tqdm.autonotebook import tqdm, trange


In [12]:
import weaviate
from langchain.vectorstores import Weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore

In [13]:
# Ingest the documents into Weaviate
vector_db = Weaviate.from_documents(
    movies, 
    embeddings, 
    client=client, 
    by_text=False
    # index_name='movies',
    # text_key='content'
)

## Reload the vector database
# vector_db = Weaviate(client, index_name = 'langchain-test', text_key = 'text')

  attn_output = torch.nn.functional.scaled_dot_product_attention(
[ERROR] Batch SSLError Exception occurred! Retrying in 2s. [1/3]
[ERROR] Batch SSLError Exception occurred! Retrying in 2s. [1/3]
[ERROR] Batch SSLError Exception occurred! Retrying in 2s. [1/3]
[ERROR] Batch SSLError Exception occurred! Retrying in 4s. [2/3]
[ERROR] Batch SSLError Exception occurred! Retrying in 2s. [1/3]
[ERROR] Batch SSLError Exception occurred! Retrying in 2s. [1/3]


In [14]:
soups = pd.Series(df['soup'].values, index=df['title'])

In [16]:
soups.head()

title
Godzilla x Kong: The New Empire     Title: Godzilla x Kong: The New Empire. Genres...
Meg 2: The Trench                   Title: Meg 2: The Trench. Genres: Action Scien...
The Pope's Exorcist                 Title: The Pope's Exorcist. Genres: Horror Mys...
Transformers: Rise of the Beasts    Title: Transformers: Rise of the Beasts. Genre...
Dune: Part Two                      Title: Dune: Part Two. Genres: Science Fiction...
dtype: object

In [75]:
def get_recommendations(title):
    
    if title not in soups:
        raise ValueError(f"Title '{title}' not found in indices")
    
    query = soups.get(title)  
    
    try:
        results = vector_db.similarity_search_with_score(query, k=11)

        top_ten = []

        for x in results[1:]:
            movie_metadata = {
                'movie': x[0].metadata['movie'],
                'language': x[0].metadata['language'],
                'popularity': x[0].metadata['popularity'],
                'score': round(x[0].metadata['score'],1),
                'synopsis': x[0].metadata['synopsis'],
                'year': x[0].metadata['year'],
                'similarity_score': round(x[1], 2)
            }
            top_ten.append(movie_metadata)

        df_top_ten = pd.DataFrame(top_ten)
        df_top_ten = df_top_ten.sort_values(by=['score', 'popularity'], ascending=[False, False])[['movie', 'language','score','year','similarity_score']]

        return df_top_ten

    except Exception as e:
        print(f"Error during query: {e}")
        return None

In [77]:
get_recommendations('The Dark Knight Rises')

Unnamed: 0,movie,language,score,year,similarity_score
0,The Dark Knight,English,8.5,2008,0.88
4,Batman Begins,English,7.7,2005,0.74
6,The Batman,English,7.6,2022,0.69
3,"Batman: The Dark Knight Returns, Part 2",English,7.5,2013,0.78
2,"Batman: The Dark Knight Returns, Part 1",English,7.3,2012,0.78
9,Batman,English,7.2,1989,0.65
1,Batman: The Dark Knight Returns,English,6.7,2013,0.79
5,Batman: Gotham Knight,English,6.6,2008,0.69
8,Batman: Gotham by Gaslight,English,6.6,2018,0.66
7,Knights of Badassdom,English,6.2,2013,0.67


In [78]:
get_recommendations('Hulk')

Unnamed: 0,movie,language,score,year,similarity_score
2,Planet Hulk,English,6.7,2010,0.72
5,Hulk vs. Wolverine,English,6.6,2009,0.69
8,Hulk vs. Thor,English,6.6,2009,0.67
4,The Trial of the Incredible Hulk,English,6.5,1989,0.7
6,Hulk Vs,English,6.5,2009,0.68
3,Hulk: Where Monsters Dwell,English,6.5,2016,0.7
1,The Incredible Hulk,English,6.4,1977,0.74
9,The Incredible Hulk Returns,English,6.4,1988,0.66
7,The Death of the Incredible Hulk,English,6.3,1990,0.68
0,The Incredible Hulk,English,6.2,2008,0.77


In [79]:
get_recommendations('Your Name')

Unnamed: 0,movie,language,score,year,similarity_score
6,Weathering with You,Japanese,7.6,2019,0.73
7,Paprika,Japanese,7.5,2006,0.72
2,Given,Japanese,7.1,2020,0.74
5,Drifting Home,Japanese,6.8,2022,0.73
3,Hello World,Japanese,6.7,2019,0.74
9,Promare,Japanese,6.6,2019,0.72
0,Her Blue Sky,Japanese,6.6,2019,0.75
8,Orange: Future,Japanese,6.6,2016,0.72
4,Urusei Yatsura: Beautiful Dreamer,Japanese,6.5,1984,0.74
1,Hal,Japanese,6.5,2013,0.75


In [80]:
get_recommendations('The Godfather')

Unnamed: 0,movie,language,score,year,similarity_score
0,The Godfather Part II,English,8.5,1974,0.91
2,The Godfather Part III,English,7.3,1990,0.88
8,The Traitor,Italian,7.3,2019,0.7
1,The Godfather Trilogy: 1901-1980,English,6.9,1992,0.88
7,The Sicilian Clan,French,6.8,1969,0.71
9,The Italian Connection,Italian,6.6,1972,0.69
5,Salvatore Giuliano,Italian,6.6,1962,0.72
3,Our Godfather,English,6.5,2019,0.82
4,The Sicilian,English,6.2,1987,0.76
6,Gotti,English,5.9,2018,0.71


In [81]:
get_recommendations('Suzume')

Unnamed: 0,movie,language,score,year,similarity_score
7,Your Name,Japanese,8.4,2016,0.66
5,Akira,Japanese,7.7,1988,0.67
0,Jujutsu Kaisen 0,Japanese,7.5,2021,0.71
8,Digimon Adventure: Last Evolution Kizuna,Japanese,6.9,2020,0.66
1,Saint Seiya Heaven Chapter: Overture,Japanese,6.8,2004,0.68
4,Kizumonogatari Part 1: Tekketsu,Japanese,6.8,2016,0.67
9,Pretty Guardian Sailor Moon Eternal The Movie ...,Japanese,6.7,2021,0.66
2,Jiang Ziya: Legend of Deification,Chinese,6.7,2020,0.67
6,Roujin Z,Japanese,6.5,1991,0.67
3,Harmony,Japanese,6.5,2015,0.67


In [82]:
get_recommendations('Inception')

Unnamed: 0,movie,language,score,year,similarity_score
8,Interstellar,English,8.4,2014,0.63
5,Donnie Darko,English,7.7,2001,0.63
9,Source Code,English,7.2,2011,0.63
4,Fantastic Planet,French,7.2,1973,0.63
3,Trance,English,6.6,2013,0.63
1,In My Dreams,English,6.5,2015,0.65
0,Gandahar,French,6.5,1987,0.65
6,Brainstorm,English,6.4,1983,0.63
2,Infini,English,6.1,2015,0.63
7,Hudson Hawk,English,6.0,1991,0.63


In [83]:
get_recommendations('Interstellar')

Unnamed: 0,movie,language,score,year,similarity_score
1,The Martian,English,7.7,2015,0.69
5,Arrival,English,7.5,2016,0.66
9,Contact,English,7.3,1997,0.65
0,Interstellar: Nolan's Odyssey,English,6.7,2014,0.72
4,Star Trek: The Motion Picture,English,6.5,1979,0.67
8,Proxima,French,6.4,2019,0.65
6,Millennium,English,6.3,1989,0.66
3,Capsule,English,6.3,2015,0.68
7,Approaching the Unknown,English,6.1,2016,0.66
2,Lost in Space,English,5.8,1998,0.68
