In [119]:
import ast
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np



In [40]:
# Load a pre-trained model (e.g., Sentence-BERT)
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

In [63]:
df = pd.read_csv('dataset/movies_all_data2.csv', encoding='utf-8')

df['genres'] = df['genres'].apply(ast.literal_eval)
# df['genres'] = df['genres'].apply(ast.literal_eval)
df['tags'] = df['tags'].apply(ast.literal_eval)
df['overview'] = df['overview'].apply(str)
df['characters'] = df['characters'].apply(ast.literal_eval)
df['directors'] = df['directors'].apply(ast.literal_eval)

In [64]:
def list_to_string(input_list):
    return ", ".join(input_list)

def preprocess(movie_data):
    return f"Title: {movie_data['title']}, Overview: {movie_data['overview']}, Genres: {list_to_string(movie_data['genres'])}, Tags: {list_to_string(movie_data['tags'])}, Directors: {list_to_string(movie_data['directors'])}, Characters: {list_to_string(movie_data['characters'])}"    

def encode_text(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze(0)   # Mean-pooling
    return embeddings.detach().numpy()

# Example: Create a corpus of combined text for all movies
# df['combined_text'] = df.apply(preprocess, axis=1)


# for index, row in df.iterrows():
#     print(f"Index: {index}, Name: {row['title']}, ID: {row['id']}")
#     print(df.columns)
#     preprocessed_text = preprocess(row)
#     print(preprocessed_text)

#     print(type(encode_text(preprocessed_text)))
#     # print(preprocess(row))

#     break

In [65]:
df['embeddings'] = df.apply(lambda x: encode_text(preprocess(x)), axis=1)


In [66]:
df["embeddings"][0].shape

(384,)

In [144]:
def get_similar_movies(df ,movie_id, n=5):
    movie_1 = df.loc[df['id'] == movie_id].squeeze()
    print(movie_1["title"], movie_1["genres"])
    query_embedding = movie_1['embeddings']
    # df2 = df
    # df2 = df[df['id'] != movie_id]
    all_embeddings = list(df['embeddings'])
    # all_embeddings = list(df['embeddings'])
    # all_embeddings = [
    #     embedding for id_, embedding in zip(df['id'], df['embeddings'])
    #     if id_ != movie_id
    # ]
    
    # Compute cosine similarity
    similarities = cosine_similarity([query_embedding], all_embeddings)
    
    # Get top-n similar movies
    similar_indices = similarities.argsort()[0][::-1][:(n+1)]
    return df.iloc[similar_indices][['id', 'title', 'genres', 'overview', 'tags', 'characters', 'directors', 'actors']]

# Example usage:


In [145]:
get_similar_movies(df, movie_id=49051, n=5)

The Hobbit: An Unexpected Journey ['Adventure', 'Fantasy', 'Action']


Unnamed: 0,id,title,genres,overview,tags,characters,directors,actors
11753,49051,The Hobbit: An Unexpected Journey,"[Adventure, Fantasy, Action]","Bilbo Baggins, a hobbit enjoying his quiet lif...","[riddle, elves, dwarves, orcs, middle-earth (t...","[Gandalf, Bilbo, Thorin]","[Peter Jackson, Victoria Sullivan]","['Ian McKellen', 'Martin Freeman', 'Richard Ar..."
12723,57158,The Hobbit: The Desolation of Smaug,"[Adventure, Fantasy]","The Dwarves, Bilbo and Gandalf have successful...","[elves, dwarves, orcs, hobbit, dragon, wizard,...","[Bilbo Baggins, Gandalf, Thorin Oakenshield]","[Peter Jackson, Carolynne Cunningham, Veroniqu...","['Martin Freeman', 'Ian McKellen', 'Richard Ar..."
14121,122917,The Hobbit: The Battle of the Five Armies,"[Action, Adventure, Fantasy]",Immediately after the events of The Desolation...,"[corruption, elves, dwarves, orcs, middle-eart...","[Bilbo Baggins, Gandalf, Thorin Oakenshield]",[Peter Jackson],"['Martin Freeman', 'Ian McKellen', 'Richard Ar..."
3670,120,The Lord of the Rings: The Fellowship of the Ring,"[Adventure, Fantasy, Action]","Young hobbit Frodo Baggins, after inheriting a...","[elves, dwarves, orcs, middle-earth (tolkien),...","[Frodo Baggins, Gandalf the Grey, Galadriel]",[Peter Jackson],"['Elijah Wood', 'Ian McKellen', 'Cate Blanchett']"
1523,123,The Lord of the Rings,"[Fantasy, Drama, Animation, Adventure]",The Fellowship of the Ring embark on a journey...,"[elves, dwarves, hobbit, mission]","[Frodo (voice), Gandalf (voice), Sam (voice)]",[Ralph Bakshi],"['Christopher Guard', 'William Squire', 'Micha..."
4321,121,The Lord of the Rings: The Two Towers,"[Adventure, Fantasy, Action]",Frodo and Sam are trekking to Mordor to destro...,"[elves, orcs, middle-earth (tolkien), hobbit, ...","[Frodo Baggins, Gandalf the White, Aragorn]",[Peter Jackson],"['Elijah Wood', 'Ian McKellen', 'Viggo Mortens..."


In [146]:
get_similar_movies(df, movie_id=68721, n=5)

Iron Man 3 ['Action', 'Adventure', 'Science Fiction']


Unnamed: 0,id,title,genres,overview,tags,characters,directors,actors
12110,68721,Iron Man 3,"[Action, Adventure, Science Fiction]",When Tony Stark's world is torn apart by a for...,"[terrorist, war on terror, tennessee, malibu, ...","[Tony Stark / Iron Man, Virginia ""Pepper"" Pott...",[Shane Black],"['Robert Downey Jr.', 'Gwyneth Paltrow', 'Don ..."
8392,1726,Iron Man,"[Action, Science Fiction, Adventure]","After being held captive in an Afghan cave, bi...","[middle east, arms dealer, malibu, marvel comi...","[Tony Stark / Iron Man, Lt. Col. James ""Rhodey...","[Jon Favreau, Cristina Weigmann]","['Robert Downey Jr.', 'Terrence Howard', 'Jeff..."
9656,10138,Iron Man 2,"[Adventure, Action, Science Fiction]",With the world now aware of his dual life as t...,"[malibu, marvel comic, superhero, based on com...","[Tony Stark / Iron Man, Virginia ""Pepper"" Pott...",[Jon Favreau],"['Robert Downey Jr.', 'Gwyneth Paltrow', 'Don ..."
12094,169934,Iron Man: Rise of Technovore,"[Science Fiction, Animation, Action]",Iron Man enlists the help of ruthless vigilant...,"[marvel comic, superhero, based on comic, mech...","[Frank Castle / The Punisher (voice), Tony Sta...","[Hiroshi Hamazaki, Kenichi Kawamura, Shingo Na...","['Tesshou Genda', 'Keiji Fujiwara', 'Miyu Irino']"
10825,24428,The Avengers,"[Science Fiction, Action, Adventure]",When an unexpected enemy emerges and threatens...,"[new york, shield, marvel comic, superhero, ba...","[Tony Stark / Iron Man, Steve Rogers / Captain...","[Joss Whedon, Trudy Ramirez]","['Robert Downey Jr.', 'Chris Evans', 'Mark Ruf..."
12088,13647,The Invincible Iron Man,"[Animation, Action, Science Fiction]",When a cocky industrialist's efforts to raise ...,"[prophecy, armor, marvel comic, billionaire, b...","[Tony Stark Iron Man, Li Mei, Howard Stark]","[Jay Oliva, Patrick Archibald]","['Marc Worden', 'Gwendoline Yeo', 'John McCook']"


In [153]:
get_similar_movies(df, movie_id=49051, n=5)

The Hobbit: An Unexpected Journey ['Adventure', 'Fantasy', 'Action']


Unnamed: 0,id,title,genres,overview,tags,characters,directors,actors
11753,49051,The Hobbit: An Unexpected Journey,"[Adventure, Fantasy, Action]","Bilbo Baggins, a hobbit enjoying his quiet lif...","[riddle, elves, dwarves, orcs, middle-earth (t...","[Gandalf, Bilbo, Thorin]","[Peter Jackson, Victoria Sullivan]","['Ian McKellen', 'Martin Freeman', 'Richard Ar..."
12723,57158,The Hobbit: The Desolation of Smaug,"[Adventure, Fantasy]","The Dwarves, Bilbo and Gandalf have successful...","[elves, dwarves, orcs, hobbit, dragon, wizard,...","[Bilbo Baggins, Gandalf, Thorin Oakenshield]","[Peter Jackson, Carolynne Cunningham, Veroniqu...","['Martin Freeman', 'Ian McKellen', 'Richard Ar..."
14121,122917,The Hobbit: The Battle of the Five Armies,"[Action, Adventure, Fantasy]",Immediately after the events of The Desolation...,"[corruption, elves, dwarves, orcs, middle-eart...","[Bilbo Baggins, Gandalf, Thorin Oakenshield]",[Peter Jackson],"['Martin Freeman', 'Ian McKellen', 'Richard Ar..."
3670,120,The Lord of the Rings: The Fellowship of the Ring,"[Adventure, Fantasy, Action]","Young hobbit Frodo Baggins, after inheriting a...","[elves, dwarves, orcs, middle-earth (tolkien),...","[Frodo Baggins, Gandalf the Grey, Galadriel]",[Peter Jackson],"['Elijah Wood', 'Ian McKellen', 'Cate Blanchett']"
1523,123,The Lord of the Rings,"[Fantasy, Drama, Animation, Adventure]",The Fellowship of the Ring embark on a journey...,"[elves, dwarves, hobbit, mission]","[Frodo (voice), Gandalf (voice), Sam (voice)]",[Ralph Bakshi],"['Christopher Guard', 'William Squire', 'Micha..."
4321,121,The Lord of the Rings: The Two Towers,"[Adventure, Fantasy, Action]",Frodo and Sam are trekking to Mordor to destro...,"[elves, orcs, middle-earth (tolkien), hobbit, ...","[Frodo Baggins, Gandalf the White, Aragorn]",[Peter Jackson],"['Elijah Wood', 'Ian McKellen', 'Viggo Mortens..."


In [154]:
get_similar_movies(df, movie_id=155, n=5)

The Dark Knight ['Drama', 'Action', 'Crime', 'Thriller']


Unnamed: 0,id,title,genres,overview,tags,characters,directors,actors
8335,155,The Dark Knight,"[Drama, Action, Crime, Thriller]",Batman raises the stakes in his war on crime. ...,"[dc comics, crime fighter, secret identity, sc...","[Bruce Wayne / Batman, Alfred Pennyworth, Joker]","[Christopher Nolan, Steve Gehrke, Nilo Otero]","['Christian Bale', 'Michael Caine', 'Heath Led..."
11013,49026,The Dark Knight Rises,"[Action, Crime, Drama, Thriller]",Following the death of District Attorney Harve...,"[dc comics, crime fighter, terrorist, secret i...","[Bruce Wayne / Batman, Alfred Pennyworth, Jame...","[Christopher Nolan, Steve Gehrke, Matthew Reed...","['Christian Bale', 'Michael Caine', 'Gary Oldm..."
451,268,Batman,"[Fantasy, Action]",The Dark Knight of Gotham City begins his war ...,"[double life, dc comics, dual identity, chemic...","[Jack Napier/The Joker, Bruce Wayne/Batman, Vi...",[Tim Burton],"['Jack Nicholson', 'Michael Keaton', 'Kim Basi..."
6984,272,Batman Begins,"[Action, Crime, Drama]","Driven by tragedy, billionaire Bruce Wayne ded...","[himalaya, martial arts, dc comics, crime figh...","[Bruce Wayne / Batman, Alfred Pennyworth, Henr...","[Christopher Nolan, Annie Penn, Cliff Lanning]","['Christian Bale', 'Michael Caine', 'Liam Nees..."
11691,123025,"Batman: The Dark Knight Returns, Part 1","[Action, Animation]",Batman has not been seen for ten years. A new ...,"[dc comics, future, based on graphic novel, dy...","[Batman / Bruce Wayne, Robin / Carrie Kelley, ...",[Jay Oliva],"['Peter Weller', 'Ariel Winter', 'David Selby']"
8534,13851,Batman: Gotham Knight,"[Animation, Action, Adventure]",Explore Bruce Wayne's transition from his begi...,"[dc comics, anthology, super powers]","[Batman / Bruce Wayne (voice), Crispus Allen (...","[Yasuhiro Aoki, Hiroshi Morioka, Jong-Sik Nam,...","['Kevin Conroy', 'Gary Dourdan', 'Jason Marsden']"


In [151]:
df.sort_values(by='vote_count', ascending=False).head(20)

Unnamed: 0,adult,id,original_language,original_title,overview,popularity,runtime,tagline,title,vote_average,vote_count,genres,release_year,overview_keywords,tags,directors,characters,actors,embeddings
9800,False,27205,en,Inception,"Cobb, a skilled thief who commits corporate es...",29.108149,148.0,Your mind is the scene of the crime.,Inception,8.1,14075.0,"[Action, Thriller, Science Fiction, Mystery, A...",2010.0,"['cobb', 'skilled', 'thief', 'commit', 'corpor...","[loss of lover, dream, kidnapping, sleep, subc...",[Christopher Nolan],"[Dom Cobb, Arthur, Ariadne]","['Leonardo DiCaprio', 'Joseph Gordon-Levitt', ...","[-0.14670397, 0.08696344, -0.09322027, -0.0624..."
8335,False,155,en,The Dark Knight,Batman raises the stakes in his war on crime. ...,123.167259,152.0,Why So Serious?,The Dark Knight,8.3,12269.0,"[Drama, Action, Crime, Thriller]",2008.0,"['batman', 'raise', 'stake', 'war', 'crime', '...","[dc comics, crime fighter, secret identity, sc...","[Christopher Nolan, Steve Gehrke, Nilo Otero]","[Bruce Wayne / Batman, Alfred Pennyworth, Joker]","['Christian Bale', 'Michael Caine', 'Heath Led...","[-0.008470067, -0.07082076, -0.21791284, -0.09..."
9364,False,19995,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",185.070892,162.0,Enter the World of Pandora.,Avatar,7.2,12114.0,"[Action, Adventure, Fantasy, Science Fiction]",2009.0,"['nd', 'century', 'paraplegic', 'marine', 'dis...","[culture clash, future, space war, space colon...",[James Cameron],"[Jake Sully, Princess Neytiri, Dr. Grace Augus...","['Sam Worthington', 'Zoe Saldana', 'Sigourney ...","[-0.015838021, 0.006896694, 0.088935144, -0.14..."
10825,False,24428,en,The Avengers,When an unexpected enemy emerges and threatens...,89.887648,143.0,Some assembly required.,The Avengers,7.4,12000.0,"[Science Fiction, Action, Adventure]",2012.0,"['unexpected', 'enemy', 'emerge', 'threaten', ...","[new york, shield, marvel comic, superhero, ba...","[Joss Whedon, Trudy Ramirez]","[Tony Stark / Iron Man, Steve Rogers / Captain...","['Robert Downey Jr.', 'Chris Evans', 'Mark Ruf...","[-0.11675756, -0.0029766196, -0.070140295, -0...."
14611,False,293660,en,Deadpool,Deadpool tells the origin story of former Spec...,187.860492,108.0,Witness the beginning of a happy ending,Deadpool,7.4,11444.0,"[Action, Adventure, Comedy]",2016.0,"['deadpool', 'tell', 'origin', 'story', 'speci...","[anti hero, mercenary, marvel comic, superhero...",[Tim Miller],"[Wade Wilson / Deadpool, Vanessa Carlysle / Co...","['Ryan Reynolds', 'Morena Baccarin', 'Ed Skrein']","[-0.12071296, -0.086758055, -0.10347109, -0.02..."
13039,False,157336,en,Interstellar,Interstellar chronicles the adventures of a gr...,32.213481,169.0,Mankind was born on Earth. It was never meant ...,Interstellar,8.1,11187.0,"[Adventure, Drama, Science Fiction]",2014.0,"['interstellar', 'chronicle', 'adventure', 'gr...","[saving the world, artificial intelligence, fa...",[Christopher Nolan],"[Joseph Cooper, Murph Cooper, Dr. Amelia Brand]","['Matthew McConaughey', 'Jessica Chastain', 'A...","[-0.1349882, -0.18524994, 0.06554142, 0.077785..."
11778,False,68718,en,Django Unchained,"With the help of a German bounty hunter, a fre...",19.785025,165.0,"Life, liberty and the pursuit of vengeance.",Django Unchained,7.8,10297.0,"[Drama, Western]",2012.0,"['help', 'german', 'bounty', 'hunter', 'freed'...","[bounty hunter, hero, plantation, society, fri...","[Quentin Tarantino, Martin Kitrosser, William ...","[Django, Dr. King Schultz, Calvin Candie]","['Jamie Foxx', 'Christoph Waltz', 'Leonardo Di...","[-0.14034198, -0.00047503712, -0.12317582, 0.0..."
13444,False,118340,en,Guardians of the Galaxy,"Light years from Earth, 26 years after being a...",53.291601,121.0,All heroes start somewhere.,Guardians of the Galaxy,7.9,10014.0,"[Action, Science Fiction, Adventure]",2014.0,"['light', 'year', 'earth', 'year', 'abduct', '...","[marvel comic, spaceship, space, outer space, ...",[James Gunn],"[Peter Quill / Star-Lord, Gamora, Drax the Des...","['Chris Pratt', 'Zoe Saldana', 'Dave Bautista']","[-0.15897991, 0.03049575, 0.14649367, -0.11911..."
2192,False,550,en,Fight Club,A ticking-time-bomb insomniac and a slippery s...,63.869599,139.0,Mischief. Mayhem. Soap.,Fight Club,8.3,9678.0,[Drama],1999.0,"['tickingtimebomb', 'insomniac', 'slippery', '...","[support group, dual identity, nihilism, rage ...","[David Fincher, Dina Waxman]","[The Narrator, Tyler Durden, Robert 'Bob' Paul...","['Edward Norton', 'Brad Pitt', 'Meat Loaf']","[-0.12449401, -0.10854275, -0.15160537, -0.014..."
11011,False,70160,en,The Hunger Games,Every year in the ruins of what was once North...,20.031667,142.0,May The Odds Be Ever In Your Favor.,The Hunger Games,6.9,9634.0,"[Science Fiction, Adventure, Fantasy]",2012.0,"['year', 'ruin', 'north', 'america', 'nation',...","[hallucination, dystopia, female protagonist, ...","[Gary Ross, Dawn Gilliam]","[Katniss Everdeen, Peeta Mellark, Gale Hawthorne]","['Jennifer Lawrence', 'Josh Hutcherson', 'Liam...","[0.06546191, 0.010160887, 0.051277403, -0.1979..."
