In [6]:
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer


In [7]:
# !pip install -U sentence-transformers

In [8]:
model = SentenceTransformer('paraphrase-distilroberta-base-v1')

Downloading:   0%|          | 0.00/391 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.74k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/329M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

In [11]:
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import PorterStemmer

def stemSentence_porter(sentence):
    porter = PorterStemmer()
    token_words=word_tokenize(sentence)
    stem_sentence=[]
    for word in token_words:
        stem_sentence.append(porter.stem(word))
        stem_sentence.append(" ")
    return "".join(stem_sentence)

def convert(text):
    L = []
    for i in ast.literal_eval(text):
        L.append(i['name']) 
    return L 


def fetch_director(text):
    L = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            L.append(i['name'])
    return L 


def collapse(L):
    L1 = []
    for i in L:
        L1.append(i.replace(" ",""))
    return L1

In [53]:
movies = pd.read_csv('new_data/tmdb_5000_movies.csv')
credits = pd.read_csv('new_data/tmdb_5000_credits.csv') 
movies = movies.merge(credits,on='title')
movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]
movies.dropna(inplace=True)
movies['genres'] = movies['genres'].apply(convert)

movies['keywords'] = movies['keywords'].apply(convert)

movies['cast'] = movies['cast'].apply(convert)

movies['cast'] = movies['cast'].apply(lambda x:x[0:3])

movies['crew'] = movies['crew'].apply(fetch_director)


# movies['cast'] = movies['cast'].apply(collapse)
# movies['crew'] = movies['crew'].apply(collapse)
# movies['genres'] = movies['genres'].apply(collapse)
# movies['keywords'] = movies['keywords'].apply(collapse)

movies['overview'] = movies['overview'].apply(lambda x:x.split())
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
new = movies.drop(columns=['overview','genres','keywords','cast','crew'])
new['tags'] = new['tags'].apply(lambda x: " ".join(x))
new = new[(new['tags'].notnull())].reset_index(drop=True)
# new['tags'] = new['tags'].apply(stemSentence_porter)

In [54]:
descriptions = new['tags'].tolist()
# print(descriptions)
des_embeddings = []
for i,des in enumerate(descriptions):
    des_embeddings.append(model.encode(des))

In [55]:
print(new[new['title']=='Avatar']['tags'].values)

['In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. Action Adventure Fantasy Science Fiction culture clash future space war space colony society space travel futuristic romance space alien tribe alien planet cgi marine soldier battle love affair anti war power relations mind and soul 3d Sam Worthington Zoe Saldana Sigourney Weaver James Cameron']


In [60]:
import torch
from sentence_transformers import SentenceTransformer, util

def recommend(query):
    #Compute cosine-similarities with all embeddings 
    query_embedd = model.encode(query)
    cosine_scores = util.pytorch_cos_sim(query_embedd, des_embeddings)
    top5_matches = torch.argsort(cosine_scores, dim=-1, descending=True).tolist()[0][0:5]
    return top5_matches

# title = 'Avatar'
# query_show_des = new.loc[new['title'] == title]['tags'].to_list()[0]
query_show_des = 'moon Pandora space colony society Sam Worthington 3d'
recommendded_results = recommend(query_show_des)

for index in recommendded_results:
    print(new.iloc[index,:])

movie_id                                                 9355
title                              Mad Max Beyond Thunderdome
tags        Mad Max becomes a pawn in a decadent oasis of ...
Name: 2999, dtype: object
movie_id                                                 6552
title                                              Idle Hands
tags        Anton is a cheerful but exceedingly non-ambiti...
Name: 2632, dtype: object
movie_id                                                 9549
title                                         The Right Stuff
tags        A chronicle of the original Mercury astronauts...
Name: 1765, dtype: object
movie_id                                                  811
title                                          Silent Running
tags        In a future Earth barren of all flora and faun...
Name: 4336, dtype: object
movie_id                                               323967
title                                                  Walter
tags        A ticket-taker a

In [37]:
print(new[new['title']=='Battleship']['tags'].values)

["When mankind beams a radio signal into space, a reply comes from ‘Planet G’, in the form of several alien crafts that splash down in the waters off Hawaii. Lieutenant Alex Hopper is a weapons officer assigned to the USS John Paul Jones, part of an international naval coalition which becomes the world's last hope for survival as they engage the hostile alien force of unimaginable strength. While taking on the invaders, Hopper must also try to live up to the potential his brother, and his fiancée's father, Admiral Shane, expect of him. Thriller Action Adventure ScienceFiction fight u.s.navy mindreading hongkong soccer scientist fictionalwar naval armada battleship navalcombat jdsmyoko lostcommunication taser buoy communicationsexpert jointchiefsofstaff crashlanding jetfighterpilot navylieutenant permissiontomarry ussjohnpauljones basedonboardgame aftercreditsstinger mightymo ussmissouri TaylorKitsch AlexanderSkarsgård Rihanna PeterBerg"]
