# Importing labraries and data loading

In [1]:
import pandas as pd
import numpy as np
import ast
from unidecode import unidecode
import re
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity 

In [11]:
df_movies = pd.read_csv("../processed_data/movies.csv")
df_movies.head(2)

Unnamed: 0,budget,id,original_language,overview,popularity,release_date,revenue,runtime,title,vote_average,...,collection,genres_list,spoken_languages_list,production_companies_list,production_countries_list,release_year,return,release_month,release_day,directors
0,30000000.0,862,en,"Led by Woody, Andy's toys live happily in his ...",21.946943,1995-10-30,373554033.0,81.0,Toy Story,7.7,...,Toy Story Collection,"['Animation', 'Comedy', 'Family']",['en'],['Pixar Animation Studios'],['US'],1995,12.45,octubre,lunes,['John Lasseter']
1,65000000.0,8844,en,When siblings Judy and Peter discover an encha...,17.015539,1995-12-15,262797249.0,104.0,Jumanji,6.9,...,,"['Adventure', 'Fantasy', 'Family']","['en', 'fr']","['TriStar Pictures', 'Teitler Film', 'Intersco...",['US'],1995,4.04,diciembre,viernes,['Joe Johnston']


In [3]:
# the type of these kind of colums are string, needs to be converted to list

type(df_movies["genres_list"][0])

str

As it's already said in the data cleaning phase, with AST library is possible to convert strings that represent an specific data structure to that data structure, like lists in this case.

In [12]:
df_movies["genres_list"] = [x if pd.isnull(x) else ast.literal_eval(x) for x in df_movies["genres_list"]]
df_movies["directors"] = [x if pd.isnull(x) else ast.literal_eval(x) for x in df_movies["directors"]]
df_movies["spoken_languages_list"] = [x if pd.isnull(x) else ast.literal_eval(x) for x in df_movies["spoken_languages_list"]]
df_movies["production_countries_list"] = [x if pd.isnull(x) else ast.literal_eval(x) for x in df_movies["production_countries_list"]]
df_movies["production_companies_list"] = [x if pd.isnull(x) else ast.literal_eval(x) for x in df_movies["production_companies_list"]]


Now the elements of these columns are type list, they can be extracted as strings. It's probable that TfidfVectorizer() can process these columns in their original state as strings, ignoring special characters like square brackets and these transformations could be redundant, but it's way to guarantee the correct performance of the following functions.

In [13]:

df_movies["genres_list"] = [x if None else ", ".join(x) for x in df_movies["genres_list"]]
df_movies["directors"] = [x if None else ", ".join(x) for x in df_movies["directors"]]
df_movies["spoken_languages_list"] = [x if None else ", ".join(x) for x in df_movies["spoken_languages_list"]]
df_movies["production_countries_list"] = [x if None else ", ".join(x) for x in df_movies["production_countries_list"]]
df_movies["production_companies_list"] = [x if None else ", ".join(x) for x in df_movies["production_companies_list"]]

In [14]:
df_movies["genres_list"]

0         Animation, Comedy, Family
1        Adventure, Fantasy, Family
2                   Romance, Comedy
3            Comedy, Drama, Romance
4                            Comedy
                    ...            
45340        Drama, Action, Romance
45341                         Drama
45342       Action, Drama, Thriller
45343                              
45344                              
Name: genres_list, Length: 45345, dtype: object

This function was already used in the building of the API notebook

In [37]:
def string_transformation(text):
    if type(text) == str:
        text = text.lower().strip().replace(" ", "")
        text = unidecode(text)  # delete accents
        text = re.sub(r'[^\w\s]', '', text)  # delete special characters and punctuation marks
        return text
    else:
     return "Entered value is not valid." 

# Testing TfidfVectorizer

TfidfVectorizer() is the main tool that's going to be used in the recommendation model, to understand better the use of this function, a series of differents objects are goint to be fit and transformed

In [6]:
tfidf = TfidfVectorizer(stop_words="english") # stop_words delete common words
test_list = ['Animation', 'Comedy', 'Family']
test_list2 = [ ['Animation', 'Comedy', 'Family'],  ['Animation', 'Comedy', 'Family']] # list of lists not allowed
test_list3 = df_movies["genres_list"][0:2] # only allowed if they are string type, not allowed if they are in lists form
test_string = "Animation", "Comedy", "Family"
test_sentence = '''Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene.
                Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate 
                  Buzz and Woody from their owner, the duo eventually learns to put aside their differences.''' # pure string not allowed
test_row = df_movies["overview"][0] # pure string not allowed
test_combination = df_movies["genres_list"][0:2] + " " + df_movies["overview"][0:2] # only allowed if they are string type, not allowed if they are in lists form

Not only these objects are going to be tested, the resulting vocabulary and bag of words can be seen as well.

## List

In [7]:
# can process list, but not list of lists 

# calculate parameters like weights, vectorize and transform to a matrix (bag of words)
tfidf_matrix = tfidf.fit_transform(test_list)
doc_term_matrix = tfidf_matrix.todense()

In [8]:
tfidf_matrix.toarray() # visualization of feature matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [9]:
df = pd.DataFrame(doc_term_matrix, 
                  columns=tfidf.get_feature_names_out() , 
                  index= test_list
                  )
df

Unnamed: 0,animation,comedy,family
Animation,1.0,0.0,0.0
Comedy,0.0,1.0,0.0
Family,0.0,0.0,1.0


## Strings separated

In [15]:
tfidf.fit_transform(test_string)   
tfidf.get_feature_names_out() 

array(['animation', 'comedy', 'family'], dtype=object)

## Rows of a dataframe (only if they are string type)

In [16]:
tfidf.fit_transform(test_list3)  
tfidf.get_feature_names_out() 

array(['adventure', 'animation', 'comedy', 'family', 'fantasy'],
      dtype=object)

## An entire column of a dataframe

In [17]:

# too big to show array

tfidf_matrix = tfidf.fit_transform(df_movies["overview"].fillna(""))   
tfidf.get_feature_names_out() 

array(['00', '000', '000km', ..., '첫사랑', 'ﬁrst', 'ﬁve'], dtype=object)

In [18]:
len(tfidf.get_feature_names_out()) # number of words

75765

## Concatenation of two columns of a dataframe (only if they are string type)

In [22]:
tfidf_matrix = tfidf.fit_transform(test_combination)   
tfidf.get_feature_names_out() 
concat_matrix = tfidf_matrix.todense()

In [23]:
len(tfidf.get_feature_names_out()) # number of words

62

In [24]:
tfidf_matrix.toarray() # 2 rows, 64 words

array([[0.        , 0.        , 0.        , 0.14001087, 0.        ,
        0.4200326 , 0.14001087, 0.14001087, 0.14001087, 0.        ,
        0.14001087, 0.4200326 , 0.14001087, 0.14001087, 0.        ,
        0.14001087, 0.        , 0.        , 0.14001087, 0.        ,
        0.14001087, 0.        , 0.09961889, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.14001087, 0.14001087,
        0.        , 0.        , 0.        , 0.        , 0.14001087,
        0.14001087, 0.14001087, 0.14001087, 0.        , 0.14001087,
        0.        , 0.        , 0.        , 0.14001087, 0.        ,
        0.14001087, 0.14001087, 0.        , 0.        , 0.        ,
        0.09961889, 0.        , 0.14001087, 0.14001087, 0.        ,
        0.        , 0.14001087, 0.        , 0.        , 0.4200326 ,
        0.        , 0.        ],
       [0.14742195, 0.14742195, 0.14742195, 0.        , 0.29484389,
        0.        , 0.        , 0.        , 0.        , 0.14742195,
        0.     

In [25]:
df = pd.DataFrame(concat_matrix, 
                  columns=tfidf.get_feature_names_out() 
                  )
df # two rows, two documents, 62 columns, 62 words in the vocabulary

Unnamed: 0,26,adult,adventure,afraid,alan,andy,animation,aside,birthday,board,...,scene,separate,siblings,terrifying,toys,trapped,unwittingly,woody,world,years
0,0.0,0.0,0.0,0.140011,0.0,0.420033,0.140011,0.140011,0.140011,0.0,...,0.140011,0.140011,0.0,0.0,0.140011,0.0,0.0,0.420033,0.0,0.0
1,0.147422,0.147422,0.147422,0.0,0.294844,0.0,0.0,0.0,0.0,0.147422,...,0.0,0.0,0.147422,0.147422,0.0,0.147422,0.147422,0.0,0.147422,0.147422


# Recommendation Model

## Setting training data

Like it was said in the exploratory data analysis notebook, it was mandotory for this project the use of a fraction of the original dataset, because the free plan offered by Render have limited memory and cannot store big matrices.

In [77]:
df_train = df_movies[df_movies["vote_count"] >= 100].reset_index()
df_train.shape

(6050, 24)

After the extraction, now the corpus of the experiment will be built with the following columns.

In [58]:
# important to use fillna(), because the result of concatenation between NaN and strings is a null value.

df_train["genres_list"] + df_train["overview"] + df_train["directors"] + df_train["collection"]  

0       Animation, Comedy, FamilyLed by Woody, Andy's ...
1                                                     NaN
2       ComedyJust when George Banks has recovered fro...
3                                                     NaN
4                                                     NaN
                              ...                        
6045    TV Movie, Family, Action, Comedy, Music, Adven...
6046                                                  NaN
6047    ComedyStuck in the corridors of time, Godefroy...
6048                                                  NaN
6049                                                  NaN
Length: 6050, dtype: object

In [116]:
df_train["corpus"] = df_train["title"].fillna("") + ", " + df_train["genres_list"].fillna("") + ", " + df_train["overview"].fillna("") + ", " + df_train["directors"].fillna("") + ", " + df_train["collection"].fillna("") 
df_train["corpus"]

0       Toy Story, Animation, Comedy, Family, Led by W...
1       Jumanji, Adventure, Fantasy, Family, When sibl...
2       Father of the Bride Part II, Comedy, Just when...
3       Heat, Action, Crime, Drama, Thriller, Obsessiv...
4       Sabrina, Comedy, Romance, An ugly duckling hav...
                              ...                        
6045    Descendants 2, TV Movie, Family, Action, Comed...
6046    Good Guys Go to Heaven, Bad Guys Go to Pattaya...
6047    The Visitors: Bastille Day, Comedy, Stuck in t...
6048    In a Heartbeat, Family, Animation, Romance, Co...
6049    Cadet Kelly, Comedy, Hyperactive teenager Kell...
Name: corpus, Length: 6050, dtype: object

In [109]:
# some algorithms are going to be tested in elements of the original dataset, so its corpus is going to be needed

df_movies["corpus"] = df_movies["title"].fillna("") + ", " + df_movies["genres_list"].fillna("") + ", " + df_movies["overview"].fillna("") + ", " + df_movies["directors"].fillna("") + ", " + df_movies["collection"].fillna("") 

SyntaxError: invalid syntax (3281119557.py, line 3)

Additional transformations to ease the input of strings in the functions.

In [81]:
df_movies["transformed_title"] = [string_transformation(x) for x in df_movies["title"]]
df_train["transformed_title"] = [string_transformation(x) for x in df_train["title"]]

## Only using TfidfVectorizer

### Subset

In [95]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english")

In [156]:
def feat_matrix(corpus):
    feature_matrix = vectorizer.fit_transform(corpus)
    return feature_matrix

In [155]:
def sim_matrix(feature_matrix):
    similarity_matrix = cosine_similarity(feature_matrix)
    return similarity_matrix

In [166]:
def get_recommendations(title, feature_matrix, similarity_matrix):
    # index of the entered title
    title = string_transformation(title)
    if title in df_train["transformed_title"].unique():
        movie_index = (df_train['transformed_title'] == title).idxmax()

        # get the similarity scores of the entered movie
        similarity_scores = similarity_matrix[movie_index]

        # obtain the index of the top similar movies
        top_indices = similarity_scores.argsort()[::-1][1:6]

        # create a list of dictionaries with the recommendations
        recommendations = []
        
        for index in top_indices:
            movie_title = df_train['title'][index]
            genres = df_train['genres_list'][index]
            director = df_train['directors'][index]
            similarity_score = similarity_scores[index].round(4)
            
            recommendation = {
                'title': movie_title,
                'genres': genres,
                'director': director,
                'similarity': similarity_score
            }
            recommendations.append(recommendation)

        return recommendations
    else:
        return "Entered value is not valid."



In [163]:
feature_matrix = feat_matrix(df_train["corpus"])
similarity_matrix = sim_matrix(feature_matrix)

In [167]:
get_recommendations("Cars", feature_matrix, similarity_matrix)

[{'title': 'Cars 2',
  'genres': 'Animation, Family, Adventure, Comedy',
  'director': 'John Lasseter, Brad Lewis',
  'similarity': 0.2087},
 {'title': 'Cars 3',
  'genres': 'Family, Comedy, Animation, Adventure',
  'director': 'Brian Fee',
  'similarity': 0.1384},
 {'title': 'Bride of Chucky',
  'genres': 'Horror, Comedy',
  'director': 'Ronny Yu',
  'similarity': 0.0627},
 {'title': 'Full Speed',
  'genres': 'Family, Comedy',
  'director': 'Nicolas Benamou',
  'similarity': 0.0516},
 {'title': 'Silverado',
  'genres': 'Action, Crime, Drama, Western',
  'director': 'Lawrence Kasdan',
  'similarity': 0.0507}]

#### Testing the function step by step

In [133]:
tfidf = feat_matrix(df_train["corpus"]) # features matrix made of the corpus of training data
similarity_matrix = sim_matrix(tfidf) # similarity between movies, every row and columns of this matrix is a movie

In [141]:
print(tfidf.shape)
print(similarity_matrix.shape)

(6050, 188826)
(6050, 6050)


In [142]:
# contain the similarity score between an specific movie and all the other movies
# argsort() order the values of the array ascendently but returns the index of the values
# [::-1] returns all the values but in reverse form, the major value first
# [1:6] gives the first 5 movies with biggest score, ignoring the entered movie which it's in position 0

similarity_matrix[1].argsort()[::-1]

array([   1, 5704, 4384, ..., 5385, 1883, 6049], dtype=int64)

In [168]:
recommendations = [] # empty list for recommendations
        
for index in similarity_matrix[1].argsort()[::-1]: # go through the array and collect these attributes
    movie_title = df_train['title'][index]
    genres = df_train['genres_list'][index]
    director = df_train['directors'][index]
    similarity_score = similarity_matrix[1][index]
            
    recommendation = { # store the attributes in this object
                'title': movie_title,
                'genres': genres,
                'director': director,
                'similarity': similarity_score
            }
    recommendations.append(recommendation) # make a list with all these objects

recommendations 

[{'title': 'Jumanji',
  'genres': 'Adventure, Fantasy, Family',
  'director': 'Joe Johnston',
  'similarity': 1.0000000000000004},
 {'title': 'Nine Lives',
  'genres': 'Fantasy, Comedy, Family',
  'director': 'Barry Sonnenfeld',
  'similarity': 0.05623213413575769},
 {'title': 'Wreck-It Ralph',
  'genres': 'Family, Animation, Comedy, Adventure',
  'director': 'Rich Moore',
  'similarity': 0.05295681270190632},
 {'title': "Geri's Game",
  'genres': 'Animation, Family',
  'director': 'Jan Pinkava',
  'similarity': 0.050932167714760467},
 {'title': 'Night of the Living Dead',
  'genres': 'Horror',
  'director': 'Tom Savini',
  'similarity': 0.04788873586369152},
 {'title': 'The Bar',
  'genres': 'Horror, Thriller, Comedy',
  'director': 'Álex de la Iglesia',
  'similarity': 0.04768086034631192},
 {'title': 'Peter Pan',
  'genres': 'Animation, Music, Family, Adventure, Fantasy',
  'director': 'Clyde Geronimi, Wilfred Jackson, Hamilton Luske',
  'similarity': 0.04746526430693994},
 {'title'

### Entire dataset

In [None]:
def get_recommendations(title, tfidf, similarity_matrix, vectorizer):
    # Transformar el corpus de la nueva película
    new_movie_corpus = df_movies[df_movies["title"] == title]["corpus"]
    new_movie_tfidf = vectorizer.transform(new_movie_corpus)


    # Ajustar la matriz de características de la nueva película para tener el mismo número de columnas que la matriz precalculada
    if new_movie_tfidf.shape[1] < tfidf.shape[1]:
        diff = tfidf.shape[1] - new_movie_tfidf.shape[1]
        zeros = np.zeros((new_movie_tfidf.shape[0], diff))
        new_movie_tfidf = np.hstack((new_movie_tfidf.toarray(), zeros))

    # Calcular las similitudes coseno entre la nueva película y todas las demás películas
    similarity_scores = cosine_similarity(new_movie_tfidf, tfidf)

    # Obtener las 10 películas más similares
    similar_movies = list(enumerate(similarity_scores[0]))
    sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:11]

    # Crear el diccionario de películas recomendadas
    recommendations = {}
    for i, score in sorted_similar_movies:
        recommendations[df_train.loc[i, 'title']] = score

    return recommendations

get_recommendations("Cars", tfidf, similarity_matrix, vectorizer)

## Using TruncatedSVD LSA or LSI

### Subset

In [143]:
def get_recommendations(title):
    # Crear bag of words
    vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english")
    tfidf = vectorizer.fit_transform(df_train['corpus'])

    user_movie = title
    # Encontrar el índice de la película del usuario
    movie_index = df_train[df_train['title'] == user_movie].index[0]

    # Aplicar LSA o LSI
    lsa = TruncatedSVD(n_components=100, algorithm='arpack')
    lsa.fit(tfidf)

    # Transformar la matriz TF-IDF a una representación de menor dimensión
    tfidf_lsa = lsa.transform(tfidf)

    # Calcular las similitudes coseno entre la película del usuario y todas las demás películas en la representación LSA
    similarity_scores = cosine_similarity(tfidf_lsa[movie_index].reshape(1, -1), tfidf_lsa)

    # Obtener las 10 películas más similares
    similar_movies = list(enumerate(similarity_scores[0]))
    sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:20]

    # Crear el diccionario de películas recomendadas
    recommendations = {}
    for i, score in sorted_similar_movies:
        recommendations[i] = df_train.loc[i, 'title']

    return recommendations

get_recommendations("Hannibal")


2168128
1781: Cars 2
2697: Cars 3
439: A Bug's Life
1733: Rango
4: Balto
1407: Welcome to the Sticks
1931: ParaNorman
1304: The Simpsons Movie
1755: Welcome to the South
627: Blazing Saddles
2288: The Book of Life
800: Mr. Deeds
1590: I Love You Phillip Morris
794: Spirit: Stallion of the Cimarron
565: Easy Rider
1128: The Curse of the Were-Rabbit
1096: Herbie Fully Loaded
2407: Maggie
1271: Meet the Robinsons


### Entire dataset

## Lemmatization

### Subset

In [None]:
import nltk
from nltk.stem import WordNetLemmatizer

nltk.download('wordnet')

In [None]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english")


def feat_matrix(corpus):
    vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english")
    
    lemmatizer = WordNetLemmatizer()
    corpus_lemmatized = [' '.join([lemmatizer.lemmatize(word) for word in doc.split()]) for doc in corpus]
    
    tfidf = vectorizer.fit_transform(corpus_lemmatized)
    return tfidf

def sim_matrix(feat_matrix = tfidf):
    similarity_matrix = cosine_similarity(feat_matrix)
    return similarity_matrix


tfidf = feat_matrix(df_train["corpus"])
similarity_matrix = sim_matrix(tfidf)

### Entire dataset

In [None]:


def get_recommendations(title, tfidf, similarity_matrix, vectorizer):
    # Transformar el corpus de la nueva película
    new_movie_corpus = df_movies[df_movies["title"] == title]["corpus"]
     # Aplicar lematización al corpus
    lemmatizer = WordNetLemmatizer()
    new_movie_corpus_lemmatized = [' '.join([lemmatizer.lemmatize(word) for word in doc.split()]) for doc in new_movie_corpus]
    
    new_movie_tfidf = vectorizer.transform(new_movie_corpus_lemmatized)


    # Ajustar la matriz de características de la nueva película para tener el mismo número de columnas que la matriz precalculada
    if new_movie_tfidf.shape[1] < tfidf.shape[1]:
        diff = tfidf.shape[1] - new_movie_tfidf.shape[1]
        zeros = np.zeros((new_movie_tfidf.shape[0], diff))
        new_movie_tfidf = np.hstack((new_movie_tfidf.toarray(), zeros))

    # Calcular las similitudes coseno entre la nueva película y todas las demás películas
    similarity_scores = cosine_similarity(new_movie_tfidf, tfidf)

    # Obtener las 10 películas más similares
    similar_movies = list(enumerate(similarity_scores[0]))
    sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:11]

    # Crear el diccionario de películas recomendadas
    recommendations = {}
    for i, score in sorted_similar_movies:
        recommendations[df_train.loc[i, 'title']] = score

    return recommendations

get_recommendations("Dirty Dancing", tfidf, similarity_matrix, vectorizer)

In [10]:
## PARA CADA PELICULA

def get_recommendations():
    # Crear bag of words
    vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english")
    tfidf = vectorizer.fit_transform(df_train['corpus'])

    # Calcular la matriz de similitud coseno entre todas las películas
    similarity_matrix = cosine_similarity(tfidf)

    # Crear el diccionario de películas recomendadas
    recommendations = {}
    for i in range(len(df_train)):
        similar_movies = list(enumerate(similarity_matrix[i]))
        sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:20]
        movie_indices = [index for index, _ in sorted_similar_movies]
        movie_titles = df_train.loc[movie_indices, 'title'].values
        recommendations[df_train.loc[i, 'title']] = movie_titles

    return recommendations

get_recommendations()


{'Toy Story': array(['Toy Story 2', 'Toy Story 3', 'Toy Story of Terror!',
        'Toy Story That Time Forgot', 'Small Fry',
        'The 40 Year Old Virgin', 'Rebel Without a Cause',
        'Everything You Always Wanted to Know About Sex *But Were Afraid to Ask',
        "A Bug's Life", 'Toys', 'Match Point', 'The Wild', 'Free Birds',
        'Small Soldiers', 'Radio Days', "Child's Play 3", 'Scoop',
        'Malice', 'Take the Money and Run'], dtype=object),
 'Jumanji': array(['Nine Lives', 'Wreck-It Ralph', "Geri's Game",
        'Night of the Living Dead', 'The Bar', 'Peter Pan',
        'Guardians of the Galaxy', 'Zathura: A Space Adventure',
        'Indie Game: The Movie', 'Dungeons & Dragons',
        'Ouija: Origin of Evil', 'Stay Alive', 'Panic Room',
        'Spy Kids 3-D: Game Over', 'eXistenZ', 'Pixels', 'Gamer',
        'Jurassic Park III', 'Hidalgo'], dtype=object),
 'Father of the Bride Part II': array(['Father of the Bride', 'George of the Jungle',
        "It's a Wo

## Including entire dataset

In [41]:
df_movies[(df_movies["vote_count"] < 100) & (df_movies["popularity"] > 10)]

Unnamed: 0,budget,id,original_language,overview,popularity,release_date,revenue,runtime,title,vote_average,...,genres_list,spoken_languages_list,production_companies_list,production_countries_list,release_year,return,release_month,release_day,directors,corpus
2,0.0,15602,en,A family wedding reignites the ancient feud be...,11.712900,1995-12-22,0.0,101.0,Grumpier Old Men,6.5,...,"Romance, Comedy",en,"Warner Bros., Lancaster Gate",US,1995,0.00,diciembre,viernes,Howard Deutch,"Grumpier Old Men, Romance, Comedy, A family we..."
42,19000000.0,35196,en,"An aspiring young physician, Robert Merivel fo...",10.979269,1995-12-29,0.0,117.0,Restoration,6.3,...,"Drama, Romance","en, es",Miramax Films,US,1995,0.00,diciembre,viernes,Michael Hoffman,"Restoration, Drama, Romance, An aspiring young..."
73,0.0,20927,en,"The seeds of love are planted when Lisa, a hig...",10.412194,1996-01-26,19030691.0,87.0,Bed of Roses,5.1,...,"Drama, Romance",en,"New Line Cinema, Juno Pix",US,1996,0.00,enero,viernes,Michael Goldenberg,"Bed of Roses, Drama, Romance, The seeds of lov..."
90,47000000.0,9095,en,A housemaid falls in love with Dr. Jekyll and ...,12.866139,1996-02-23,12379402.0,104.0,Mary Reilly,5.7,...,"Drama, Horror, Thriller, Romance",en,"TriStar Pictures, NFH Productions",US,1996,0.26,febrero,viernes,Stephen Frears,"Mary Reilly, Drama, Horror, Thriller, Romance,..."
130,50000000.0,11863,en,Someone does a nasty hatchet job on a San Fran...,11.418917,1995-10-13,9851610.0,95.0,Jade,5.1,...,"Action, Thriller, Mystery, Romance",en,Paramount Pictures,US,1995,0.20,octubre,viernes,William Friedkin,"Jade, Action, Thriller, Mystery, Romance, Some..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44922,0.0,404733,en,Master car thieves square off against French g...,24.124945,2017-06-29,0.0,93.0,Overdrive,6.2,...,"Thriller, Action","en, fr, de","Kinology, Umedia, Setient Pictures",FR,2017,0.00,junio,jueves,Antonio Negret,"Overdrive, Thriller, Action, Master car thieve..."
44950,0.0,444902,en,Spending the summer at a Catskills resort with...,11.145052,2017-05-24,0.0,130.0,Dirty Dancing,5.3,...,"Music, Drama, Romance, TV Movie",en,Lionsgate Television,US,2017,0.00,mayo,miércoles,Wayne Blair,"Dirty Dancing, Music, Drama, Romance, TV Movie..."
45026,0.0,407531,en,A group of friends download a Siri-like App wh...,16.227875,2017-06-28,0.0,91.0,Bedeviled,4.9,...,"Horror, Thriller",en,"Conduit, Circle 18, STANDOFF Pictures",US,2017,0.00,junio,miércoles,"Abel Vang, Burlee Vang","Bedeviled, Horror, Thriller, A group of friend..."
45030,0.0,463906,en,"International master thief, Simon Templar, als...",28.235930,2017-07-11,0.0,92.0,The Saint,5.8,...,"Action, Adventure, Crime","en, pt",,US,2017,0.00,julio,martes,Ernie Barbarash,"The Saint, Action, Adventure, Crime, Internati..."
