In [2]:
import pandas as pd
import numpy as np


data = pd.read_csv("core/data/netflix_titles.csv")

data.head()


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [3]:
data.columns


Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [4]:
def create_textual_representation(row):
    textual_representation = f"""

    Type:{row["type"]},
    Title:{row["title"]},
    Director:{row["director"]},
    Cast:{row["cast"]},
    Release_year:{row["release_year"]},
    Rating:{row["rating"]},

    Description:{row["description"]},

    """

    return textual_representation


In [12]:
data["textual_representation"] = data.apply(create_textual_representation, axis=1)


data["textual_representation"].values[123]


'\n\n    Type:TV Show,\n    Title:Luv Kushh,\n    Director:nan,\n    Cast:nan,\n    Release_year:2012,\n    Rating:TV-Y7,\n\n    Description:Based on the last book of the epic Ramayana, this series follows the endeavors and adventures of Lord Rama’s twin sons through their childhood.,\n\n    '

In [14]:
import faiss
import requests


dim = 4096
index = faiss.IndexFlatL2(dim)

X = np.zeros(
    (len(data["textual_representation"]), dim),
    dtype="float32",
)


for i, representation in enumerate(data["textual_representation"]):
    if i % 20 == 0:
        print("Processed ", str(i), " instances")

    res = requests.post(
        "http://localhost:11434/api/embeddings",
        json={
            "model": "llama2",
            "prompt": representation,
            # "": "",
        },
    )

    embedding = res.json()["embedding"]

    X[i] = np.array(embedding)

index.add[X]


Processed  0  instances


ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/embeddings (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x75116c2d0050>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [15]:
# faiss.write_index(index, "index")


In [19]:
# index = faiss.read_index("index")


data[data["title"].str.contains("Island")].head()


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,textual_representation
53,s54,Movie,InuYasha the Movie 4: Fire on the Mystic Island,Toshiya Shinohara,"Kappei Yamaguchi, Satsuki Yukino, Koji Tsujita...",Japan,"September 15, 2021",2004,TV-PG,88 min,"Action & Adventure, Anime Features, Internatio...","Ai, a young half-demon who has escaped from Ho...","\n\n Type:Movie,\n Title:InuYasha the Mo..."
252,s253,Movie,Black Island,Miguel Alexandre,"Philip Froissant, Alice Dwyer, Hanns Zischler,...",,"August 18, 2021",2021,TV-MA,105 min,"International Movies, Thrillers",The dark secrets of a seemingly peaceful islan...,"\n\n Type:Movie,\n Title:Black Island,\n..."
1222,s1223,Movie,The Block Island Sound,"Kevin McManus, Matthew McManus","Chris Sheffield, Michaela McManus, Neville Arc...",United States,"March 11, 2021",2020,TV-MA,99 min,"Horror Movies, Independent Movies","On an island, a fisherman's family faces horro...","\n\n Type:Movie,\n Title:The Block Islan..."
1358,s1359,Movie,Shutter Island,Martin Scorsese,"Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley,...",United States,"February 1, 2021",2010,R,139 min,Thrillers,A U.S. marshal's troubling visions compromise ...,"\n\n Type:Movie,\n Title:Shutter Island,..."
1571,s1572,Movie,Rose Island,Sydney Sibilia,"Elio Germano, Matilda De Angelis, Tom Wlaschih...",Italy,"December 9, 2020",2020,TV-14,119 min,"Comedies, Dramas, International Movies",An idealistic engineer builds his own island o...,"\n\n Type:Movie,\n Title:Rose Island,\n ..."


In [21]:
user_movie = data.iloc[1358]

user_movie


show_id                                                               s1359
type                                                                  Movie
title                                                        Shutter Island
director                                                    Martin Scorsese
cast                      Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley,...
country                                                       United States
date_added                                                 February 1, 2021
release_year                                                           2010
rating                                                                    R
duration                                                            139 min
listed_in                                                         Thrillers
description               A U.S. marshal's troubling visions compromise ...
textual_representation    \n\n    Type:Movie,\n    Title:Shutter Island,...
Name: 1358, 

In [None]:
res = requests.post(
    "http://localhost:11434/api/embeddings",
    json={
        "model": "llama2",
        "prompt": user_movie["textual_representation"],
        # "": "",
    },
)

embedding = np.array(
    [res.jspn()["embedding"]],
    dtype="float32",
)

D, I = index.search(embedding, 5)


In [23]:
best_matches = np.array(data["textual_representation"])[I.flatten()]

for match in best_matches:
    print("NEXT MOVIE")
    print(match)
    print()

    # https://youtu.be/epidA1fBFtI


NameError: name 'I' is not defined

In [None]:
# print("""\n\n    Type:TV Show,\n    Title:Luv Kushh,\n    Director:nan,\n    Cast:nan,\n    Release_year:2012,\n    Rating:TV-Y7,\n\n    Description:Based on the last book of the epic Ramayana, this series follows the endeavors and adventures of Lord Rama’s twin sons through their childhood.,\n\n""")

repr = """
    Type:Movie,
    Title:Sandman,
    Director:nan,
    Cast:nan,
    Release_year:2012,
    Rating:TV-Y7,

    Description:Billie finds a strange artifact at the beach. when he picks it up he gets the powers of sandman. watch the adventures that follow as billie fights sea monsters and sand monsters.,

"""


best_matches = np.array(repr)[I.flatten()]


for match in best_matches:
    print("NEXT MOVIE")
    print(match)
    print()

    # https://youtu.be/epidA1fBFtI
