In [None]:
#IMPORTS
import pandas as pd
from sentence_transformers import SentenceTransformer

In [2]:
#reading dataset
df = pd.read_csv('netflix_titles.csv')

In [None]:
#choosing model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [4]:
#creating function to convert each row into a single string
def create_textual_representation(row):
    textual_representation=f"""Type:{row['type']},
Title:{row['title']},
Cast:{row['cast']},
Release:{row['release_year']},
Director:{row['director']},
Genres:{row['listed_in']},
Description:{row['description']}"""
    return textual_representation

In [5]:
#adding extra column for each row string
df['textual_representation']=df.apply(create_textual_representation,axis=1)

In [18]:
#more imports and choosing output dimensions
import faiss
import requests
import numpy as np
dim=384

In [20]:
#initializing faiss database
index=faiss.IndexFlatL2(dim)
X=np.zeros((len(df['textual_representation']),dim),dtype='float32')

In [None]:
#embedding each row and adding to faiss database
for i, representation in enumerate(df['textual_representation']):
    if i % 10 == 0:
        print('Processed', str(i), 'instance')

    # Replaces the Ollama API call:
    embedding = model.encode(representation, normalize_embeddings=True)

    X[i] = np.array(embedding)
index.add(X)

In [22]:
#creating the index file
faiss.write_index(index,'index')

In [29]:
#reading from the newly created index file
index=faiss.read_index('index')

In [None]:
#creating a movie string which isn't listed in the dataset
favourite_movie="""Type:Movie,
Title:The Prestige,
Cast:Christian Bale, Hugh Jackman, Scarlett Johansson,
Director:Christopher Nolan,
Genres:Drama, Mystery, Thriller, Sci-Fi,
Description:Two rival magicians in 19th-century London engage in a bitter competition to create the ultimate illusion, pushing each other to the edge of obsession and sacrifice."""
favourite_movie

In [None]:
#for testing purposes run this cell block and enter your own movie/show details
Type="Type:"+input("Type (ie Movie/Show )")
Title=",\nTitle:"+input("Title (ie The Godfather Part II)")
Cast=",\nCast:"+input("Cast (ie Christian Bale, Hugh Jackman, Scarlett Johansson)")
Director=",\nDirector:"+input("Director (ie Christopher Nolan)")
Genres=",\nGenres:"+input("Genre (ie Drama, Mystery, Thriller, Sci-Fi)")
Description=",\nDescription:"+input("Description (ie Two rival magicians in 19th-century London engage in a...)")
favourite_movie= Type + Title + Cast + Director + Genres + Description
favourite_movie

In [56]:
#converted to embedding
embedding = np.array(model.encode(favourite_movie, normalize_embeddings=True)).astype('float32')

In [57]:
embedding = embedding.reshape(1, dim)

In [58]:
k = 5  # number of nearest neighbors
distances, indices = index.search(embedding, k)


In [None]:
#top similar movies/shows
print("Top similar movies to: The Prestige")
for idx, dist in zip(indices[0], distances[0]):
    print('________________________________________________________________________________________')
    print(f"- {df['textual_representation'].iloc[idx]} (distance: {dist:.4f})")


In [None]:
import gradio as gr

# Example: suppose you have a function like this
def process_input(text):
    # replace with your actual code logic
    return "Result: " + text.upper()

# Create the Gradio interface
ui = gr.Interface(
    fn=process_input,
    inputs=gr.Textbox(label="Enter your text"),
    outputs=gr.Textbox(label="Processed Result"),
    title="My ML Project",
    description="A simple Gradio app that processes your input and returns a result."
)

ui.launch()
