In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import joblib

%matplotlib inline

On this particular case, I am using Content-Based Filtering rather than Collaborative Learning to build a recommendation system for this dataset.

#### Loading the dataset

In [2]:
df = pd.read_csv('movies_cleaned.csv', encoding = 'latin1')
df.head()

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
0,8000000,Columbia Pictures Corporation,USA,Rob Reiner,Adventure,52287414,Stand by Me,R,1986-08-22,89,8.1,Wil Wheaton,299174,Stephen King,1986
1,6000000,Paramount Pictures,USA,John Hughes,Comedy,70136369,Ferris Bueller's Day Off,PG-13,1986-06-11,103,7.8,Matthew Broderick,264740,John Hughes,1986
2,15000000,Paramount Pictures,USA,Tony Scott,Action,179800601,Top Gun,PG,1986-05-16,110,6.9,Tom Cruise,236909,Jim Cash,1986
3,18500000,Twentieth Century Fox Film Corporation,USA,James Cameron,Action,85160248,Aliens,R,1986-07-18,137,8.4,Sigourney Weaver,540152,James Cameron,1986
4,9000000,Walt Disney Pictures,USA,Randal Kleiser,Adventure,18564613,Flight of the Navigator,PG,1986-08-01,90,6.9,Joey Cramer,36636,Mark H. Baker,1986


#### Creating new dataset, only contains features that I think has impact on the recommendation system

***Selecting A Great Movie To Watch***
- Select by themes
One can choose a film by deciding on what kind of theme they want to see.

- Choose by genre
One can also consider selecting a movie according to its genre.

- Find one with your favorite actors
Another great way that can guide you in selecting a movie is to pick a film that has your favorite actors and actresses.

- Select by audience
Kids Playing Computer GameRatings for the audience is another way that one can use to select a great movie to watch. 

- Select by the director
Lastly one can look at the director and producer of the film.

taken from http://www.gacds.org/entertainment/tips-to-selecting-a-great-movie-to-watch/

- I will also use the 'year' feature as it could make a better model.

taken from https://medium.com/fnplus/content-based-recommendations-ffb221931485

In [3]:
data = df.copy()[['name','company','director','genre','star','rating', 'score', 'year']]
data.head()

Unnamed: 0,name,company,director,genre,star,rating,score,year
0,Stand by Me,Columbia Pictures Corporation,Rob Reiner,Adventure,Wil Wheaton,R,8.1,1986
1,Ferris Bueller's Day Off,Paramount Pictures,John Hughes,Comedy,Matthew Broderick,PG-13,7.8,1986
2,Top Gun,Paramount Pictures,Tony Scott,Action,Tom Cruise,PG,6.9,1986
3,Aliens,Twentieth Century Fox Film Corporation,James Cameron,Action,Sigourney Weaver,R,8.4,1986
4,Flight of the Navigator,Walt Disney Pictures,Randal Kleiser,Adventure,Joey Cramer,PG,6.9,1986


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6820 entries, 0 to 6819
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   name      6820 non-null   object 
 1   company   6820 non-null   object 
 2   director  6820 non-null   object 
 3   genre     6820 non-null   object 
 4   star      6820 non-null   object 
 5   rating    6820 non-null   object 
 6   score     6820 non-null   float64
 7   year      6820 non-null   int64  
dtypes: float64(1), int64(1), object(6)
memory usage: 426.4+ KB


In [5]:
data.describe()

Unnamed: 0,score,year
count,6820.0,6820.0
mean,6.374897,2001.276393
std,1.003142,8.986115
min,1.5,1986.0
25%,5.8,1994.0
50%,6.4,2001.0
75%,7.1,2009.0
max,9.3,2017.0


In [6]:
len(data['name'])

6820

There are 6820 movie titles that are available in this dataset.

In [7]:
data['genre'].unique()

array(['Adventure', 'Comedy', 'Action', 'Drama', 'Crime', 'Thriller',
       'Horror', 'Animation', 'Biography', 'Sci-Fi', 'Musical', 'Family',
       'Fantasy', 'Mystery', 'War', 'Romance', 'Western'], dtype=object)

There are about 17 different movie genres in this dataset

#### Changing 'score' feature's dtype from float to string/object to able to use Content-Based Filtering

In [8]:
print(data['score'].dtype)

float64


In [9]:
data['score'] = data['score'].astype('str') 
data['score'].unique()

array(['8.1', '7.8', '6.9', '8.4', '7.4', '6.8', '7.5', '6.5', '7.2',
       '7.3', '5.9', '5.4', '4.6', '6.0', '6.6', '5.6', '5.3', '7.1',
       '6.4', '5.7', '7.0', '6.3', '3.8', '6.7', '4.3', '8.0', '5.5',
       '7.6', '6.1', '6.2', '5.2', '5.8', '4.8', '4.4', '5.0', '4.9',
       '4.5', '3.1', '3.3', '4.0', '2.9', '4.7', '4.1', '5.1', '3.5',
       '4.2', '8.3', '7.9', '7.7', '3.6', '3.2', '2.7', '2.2', '3.7',
       '8.2', '8.5', '3.4', '2.8', '3.9', '8.7', '2.3', '1.6', '8.6',
       '8.9', '9.3', '8.8', '2.6', '2.4', '2.5', '3.0', '2.0', '2.1',
       '9.0', '1.9', '1.5'], dtype=object)

#### Changing 'score' feature's dtype from float to string/object to able to use Content-Based Filtering


In [10]:
print(data['year'].dtype)

int64


In [11]:
data['year'] = data['year'].astype('str') 
data['year'].unique()

array(['1986', '1987', '1990', '1989', '1988', '1992', '2004', '1991',
       '2008', '1999', '1993', '1995', '2016', '1997', '1994', '1996',
       '2001', '1998', '2000', '2014', '2002', '2003', '2007', '2005',
       '2006', '2012', '2010', '2009', '2011', '2013', '2015', '2017'],
      dtype=object)

In [12]:
data.head()

Unnamed: 0,name,company,director,genre,star,rating,score,year
0,Stand by Me,Columbia Pictures Corporation,Rob Reiner,Adventure,Wil Wheaton,R,8.1,1986
1,Ferris Bueller's Day Off,Paramount Pictures,John Hughes,Comedy,Matthew Broderick,PG-13,7.8,1986
2,Top Gun,Paramount Pictures,Tony Scott,Action,Tom Cruise,PG,6.9,1986
3,Aliens,Twentieth Century Fox Film Corporation,James Cameron,Action,Sigourney Weaver,R,8.4,1986
4,Flight of the Navigator,Walt Disney Pictures,Randal Kleiser,Adventure,Joey Cramer,PG,6.9,1986


#### Replacing ' ' (white space) with '' (empty string) to make a new feature called 'Bag of Words' to make it easier for the system to find similarities between each data.

``The bag-of-words model is a popular and simple feature extraction technique used when we work with text. It describes the occurrence of each word within a document``

Machine learning algorithms cannot work with raw text directly, we need to convert the text into vectors of numbers. This is called feature extraction.

The bag-of-words model is a popular and simple feature extraction technique used when we work with text. It describes the occurrence of each word within a document.

To use this model, we need to:

1.    Design a vocabulary of known words (also called tokens)
2.    Choose a measure of the presence of known words

Any information about the order or structure of words is discarded. That’s why it’s called a bag of words. This model is trying to understand whether a known word occurs in a document, but don’t know where is that word in the document.

The intuition is that similar documents have similar contents. Also, from a content, we can learn something about the meaning of the document.

taken from https://towardsdatascience.com/your-guide-to-natural-language-processing-nlp-48ea2511f6e1

In [13]:
data['company'] = data['company'].apply(lambda x: x.replace(' ',''))
data['director'] = data['director'].apply(lambda x: x.replace(' ',''))
data['genre'] = data['genre'].apply(lambda x: x.replace(' ',''))
data['star'] = data['star'].apply(lambda x: x.replace(' ',''))
data['rating'] = data['rating'].apply(lambda x: x.replace(' ',''))

data.head()

Unnamed: 0,name,company,director,genre,star,rating,score,year
0,Stand by Me,ColumbiaPicturesCorporation,RobReiner,Adventure,WilWheaton,R,8.1,1986
1,Ferris Bueller's Day Off,ParamountPictures,JohnHughes,Comedy,MatthewBroderick,PG-13,7.8,1986
2,Top Gun,ParamountPictures,TonyScott,Action,TomCruise,PG,6.9,1986
3,Aliens,TwentiethCenturyFoxFilmCorporation,JamesCameron,Action,SigourneyWeaver,R,8.4,1986
4,Flight of the Navigator,WaltDisneyPictures,RandalKleiser,Adventure,JoeyCramer,PG,6.9,1986


#### Lowering the case so it will make the user easier to input the movie of their choice

In [14]:
data['company'] = data['company'].apply(lambda x: x.lower())
data['director'] = data['director'].apply(lambda x: x.lower())
data['genre'] = data['genre'].apply(lambda x: x.lower())
data['star'] = data['star'].apply(lambda x: x.lower())
data['rating'] = data['rating'].apply(lambda x: x.lower())
movies = pd.Series(data['name'].str.lower())

data.head()

Unnamed: 0,name,company,director,genre,star,rating,score,year
0,Stand by Me,columbiapicturescorporation,robreiner,adventure,wilwheaton,r,8.1,1986
1,Ferris Bueller's Day Off,paramountpictures,johnhughes,comedy,matthewbroderick,pg-13,7.8,1986
2,Top Gun,paramountpictures,tonyscott,action,tomcruise,pg,6.9,1986
3,Aliens,twentiethcenturyfoxfilmcorporation,jamescameron,action,sigourneyweaver,r,8.4,1986
4,Flight of the Navigator,waltdisneypictures,randalkleiser,adventure,joeycramer,pg,6.9,1986


#### The Bag of Words feature will include 5 different feature, which are ***director***, ***genre***, ***star***, ***rating***, and ***score***.

In [15]:
data['bagofwords'] = data['company'] + ' ' + data['director'] + ' ' + data['genre'] + ' ' + data['star'] + ' ' + data['rating'] + ' ' + data['score'] + ' ' + data['year'] 
data.head()

Unnamed: 0,name,company,director,genre,star,rating,score,year,bagofwords
0,Stand by Me,columbiapicturescorporation,robreiner,adventure,wilwheaton,r,8.1,1986,columbiapicturescorporation robreiner adventur...
1,Ferris Bueller's Day Off,paramountpictures,johnhughes,comedy,matthewbroderick,pg-13,7.8,1986,paramountpictures johnhughes comedy matthewbro...
2,Top Gun,paramountpictures,tonyscott,action,tomcruise,pg,6.9,1986,paramountpictures tonyscott action tomcruise p...
3,Aliens,twentiethcenturyfoxfilmcorporation,jamescameron,action,sigourneyweaver,r,8.4,1986,twentiethcenturyfoxfilmcorporation jamescamero...
4,Flight of the Navigator,waltdisneypictures,randalkleiser,adventure,joeycramer,pg,6.9,1986,waltdisneypictures randalkleiser adventure joe...


#### New dataframe to see the name of the movie and its bag of words.

In [16]:
df_bow = data[['name','bagofwords']]
df_bow.head()

Unnamed: 0,name,bagofwords
0,Stand by Me,columbiapicturescorporation robreiner adventur...
1,Ferris Bueller's Day Off,paramountpictures johnhughes comedy matthewbro...
2,Top Gun,paramountpictures tonyscott action tomcruise p...
3,Aliens,twentiethcenturyfoxfilmcorporation jamescamero...
4,Flight of the Navigator,waltdisneypictures randalkleiser adventure joe...


#### In this model, I am using CountVectorizer rather than TfidfVectorizer because II need a simple frequency counter for each word in my bag_of_words column. Tf-Idf tends to give less importance to the words that are more present in the entire corpus (our whole column, in this case) which is not what we want for this application, because every word is important to detect similarity 
(taken from https://towardsdatascience.com/how-to-build-from-scratch-a-content-based-movie-recommender-with-natural-language-processing-25ad400eb243)

#### CountVectorizer:  to convert a collection of text documents to a matrix of token counts



In [17]:
ext = CountVectorizer()
matrix = ext.fit_transform(data['bagofwords'])
print(len(ext.get_feature_names()))
print(ext.get_feature_names())

7834
['101ststreetfilms', '10thholeproductions', '120films', '13', '13productions', '14', '1492pictures', '17', '1821pictures', '1984privatedefensecontractors', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1992numberfourlimitedpartnership', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '19entertainment', '2000', '2001', '2002', '2003', '2003productions', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '20thcenturyfox', '21lapsentertainment', '21stcenturyfilmcorporation', '25thhourproductions', '26films', '2929productions', '2loopfilms', '2pictures', '2seisakuiinkai', '3311productions', '360pictures', '38productions', '391productions', '3artsentertainment', '3bproductions', '3markentertainment', '3milesapartproductionsltd', '40acres', '42', '4kfilms', '4kidsentertainment', '50cent', '60', '7artsinternational', '7filmscinã', '80productions', '8383productions', '85', '888productions', '88productions', '9

In [18]:
matrix.toarray()

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

#### Building a model using cosine similarity
- Cosine similarity is a metric used to measure how similar the documents are irrespective of their size. Mathematically, it measures the cosine of the angle between two vectors projected in a multi-dimensional space

In [19]:
cosScore = cosine_similarity(matrix)
cosScore

array([[1.        , 0.16903085, 0.18257419, ..., 0.        , 0.        ,
        0.        ],
       [0.16903085, 1.        , 0.46291005, ..., 0.        , 0.        ,
        0.        ],
       [0.18257419, 0.46291005, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.14433757,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.14433757, 1.        ,
        0.16666667],
       [0.        , 0.        , 0.        , ..., 0.        , 0.16666667,
        1.        ]])

In [20]:
cosScore.shape

(6820, 6820)

In [21]:
movies

0                          stand by me
1             ferris bueller's day off
2                              top gun
3                               aliens
4              flight of the navigator
                     ...              
6815    absolutely fabulous: the movie
6816             mothers and daughters
6817          batman: the killing joke
6818             the eyes of my mother
6819         from the land of the moon
Name: name, Length: 6820, dtype: object

The cosine similarity is basically like this, 0 to 1 with 1 is the highest based on similarity score.

<img src="https://miro.medium.com/max/852/1*CQ-1nBzzcGJ9ncEMgj1_lA.png" width=400> 

#### Defining a recommender system

In [22]:
def imdb(name, cosScore = cosScore):
    idx = movies[movies == name].index[0]
    similar = list(enumerate(cosScore[idx]))  
# manually ranked
    similar = sorted(similar, key=lambda x: x[1], reverse=True)
# filtering to only movies which have more than 50% cosine similarity score
#     similar = list(filter(lambda x: x[1] > 0.5, similar))
# 1 because the inputted movie will not show on the recommendation, 11 because I will only show 10 movies
    similar = similar[1:11]
    recommendation = [i[0] for i in similar]
    return df.iloc[recommendation]

#### Testing the recommender system

#### #1

In [23]:
movie_name = "Stand by Me"
movie_recommendation = imdb(movie_name.lower())
movie_recommendation

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
59,0,Columbia Pictures Corporation,USA,Walter Hill,Drama,5839031,Crossroads,R,1986-03-14,99,7.1,Ralph Macchio,11415,John Fusco,1986
67,15000000,Warner Bros.,USA,Michael Chapman,Adventure,1953732,The Clan of the Cave Bear,R,1986-01-17,98,5.3,Daryl Hannah,5340,Jean M. Auel,1986
143,0,Columbia Pictures Corporation,USA,Richard Tuggle,Action,5099316,Out of Bounds,R,1986-07-25,93,5.4,Anthony Michael Hall,1016,Tony Kayden,1986
815,34000000,Columbia Pictures Corporation,USA,Luis Puenzo,Adventure,3574256,Old Gringo,R,1989-10-06,100,5.8,Jane Fonda,1556,Carlos Fuentes,1989
1328,40000000,Columbia Pictures Corporation,USA,Rob Reiner,Drama,141340178,A Few Good Men,R,1992-12-11,138,7.7,Tom Cruise,196002,Aaron Sorkin,1992
4,9000000,Walt Disney Pictures,USA,Randal Kleiser,Adventure,18564613,Flight of the Navigator,PG,1986-08-01,90,6.9,Joey Cramer,36636,Mark H. Baker,1986
24,25000000,Paramount Pictures,USA,Leonard Nimoy,Adventure,109713132,Star Trek IV: The Voyage Home,PG,1986-11-26,119,7.3,William Shatner,66366,Gene Roddenberry,1986
33,24500000,Warner Bros.,UK,Roland JoffÃ©,Adventure,17218023,The Mission,PG,1986-10-31,125,7.5,Robert De Niro,47497,Robert Bolt,1986
40,25000000,The Saul Zaentz Company,USA,Peter Weir,Adventure,14302779,The Mosquito Coast,PG,1986-11-26,117,6.6,Harrison Ford,23076,Paul Theroux,1986
51,18000000,ABC Motion Pictures,USA,Harry Winer,Adventure,9697739,SpaceCamp,PG,1986-06-06,107,5.6,Kate Capshaw,11484,Clifford Green,1986


#### #2

In [24]:
movie_name = "Transformers"
movie_recommendation = imdb(movie_name.lower())
movie_recommendation

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
5082,200000000,DreamWorks,USA,Michael Bay,Action,402111870,Transformers: Revenge of the Fallen,PG-13,2009-06-24,150,6.0,Shia LaBeouf,343549,Ehren Kruger,2009
4211,126000000,DreamWorks,USA,Michael Bay,Action,35818913,The Island,PG-13,2005-07-22,136,6.9,Scarlett Johansson,275721,Caspian Tredwell-Owen,2005
4664,20000000,DreamWorks,USA,D.J. Caruso,Drama,80209692,Disturbia,PG-13,2007-04-13,105,6.9,Shia LaBeouf,196330,Christopher Landon,2007
4925,80000000,DreamWorks,USA,D.J. Caruso,Action,101111837,Eagle Eye,PG-13,2008-09-26,118,6.6,Shia LaBeouf,158402,John Glenn,2008
5521,195000000,Paramount Pictures,USA,Michael Bay,Action,352390543,Transformers: Dark of the Moon,PG-13,2011-06-29,154,6.3,Shia LaBeouf,345669,Ehren Kruger,2011
2644,140000000,Touchstone Pictures,USA,Michael Bay,Action,201573391,Armageddon,PG-13,1998-07-01,151,6.6,Bruce Willis,344203,Jonathan Hensleigh,1998
3321,140000000,Touchstone Pictures,USA,Michael Bay,Action,198542554,Pearl Harbor,PG-13,2001-05-25,183,6.1,Ben Affleck,272170,Randall Wallace,2001
3902,0,DreamWorks,USA,Reggie Rock Bythewood,Action,21701045,Biker Boyz,PG-13,2003-01-31,110,4.6,Laurence Fishburne,11532,Michael Gougis,2003
4509,21000000,Constantin Film,USA,Corey Yuen,Action,480314,DOA: Dead or Alive,PG-13,2007-06-15,87,4.8,Jaime Pressly,39969,J.F. Lawton,2007
4631,300000000,Walt Disney Pictures,USA,Gore Verbinski,Action,309420425,Pirates of the Caribbean: At World's End,PG-13,2007-05-25,169,7.1,Johnny Depp,514191,Ted Elliott,2007


#### #3

In [25]:
movie_name = "Ferris Bueller's Day Off"
movie_recommendation = imdb(movie_name.lower())
movie_recommendation

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
8,9000000,Paramount Pictures,USA,Howard Deutch,Comedy,40471663,Pretty in Pink,PG-13,1986-02-28,96,6.8,Molly Ringwald,60565,John Hughes,1986
74,18000000,Paramount Pictures,USA,Ron Howard,Comedy,36611610,Gung Ho,PG-13,1986-03-14,112,6.2,Michael Keaton,9965,Edwin Blum,1986
522,0,Paramount Pictures,USA,John Hughes,Comedy,16031707,She's Having a Baby,PG-13,1988-02-05,106,5.8,Kevin Bacon,9676,John Hughes,1988
12,6000000,Twentieth Century Fox Film Corporation,USA,David Seltzer,Comedy,8200000,Lucas,PG-13,1986-03-28,100,6.8,Corey Haim,12228,David Seltzer,1986
21,11000000,Orion Pictures,USA,Alan Metter,Comedy,91258000,Back to School,PG-13,1986-06-13,96,6.6,Rodney Dangerfield,23120,Rodney Dangerfield,1986
28,25000000,Paramount Pictures,USA,Michael Ritchie,Action,79817937,The Golden Child,PG-13,1986-12-12,94,5.9,Eddie Murphy,42997,Dennis Feldman,1986
45,18000000,TriStar Pictures,USA,Francis Ford Coppola,Comedy,41382841,Peggy Sue Got Married,PG-13,1986-10-10,103,6.3,Kathleen Turner,28529,Jerry Leichtling,1986
47,1100000,Empire Pictures,USA,John Carl Buechler,Comedy,5450815,Troll,PG-13,1986-01-17,82,4.3,Michael Moriarty,7569,Ed Naha,1986
52,6400000,Orion Pictures,USA,Woody Allen,Comedy,40084041,Hannah and Her Sisters,PG-13,1986-03-14,107,8.0,Mia Farrow,56988,Woody Allen,1986
71,0,Balcor Film Investors,USA,Steve Miner,Comedy,27820000,Soul Man,PG-13,1986-10-24,104,5.2,C. Thomas Howell,6440,Carol Black,1986


#### #4

In [26]:
movie_name = "Top Gun"
movie_recommendation = imdb(movie_name.lower())
movie_recommendation

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
28,25000000,Paramount Pictures,USA,Michael Ritchie,Action,79817937,The Golden Child,PG-13,1986-12-12,94,5.9,Eddie Murphy,42997,Dennis Feldman,1986
2214,80000000,Paramount Pictures,USA,Brian De Palma,Action,180981856,Mission: Impossible,PG-13,1996-05-22,110,7.1,Tom Cruise,323291,Bruce Geller,1996
3105,125000000,Paramount Pictures,USA,John Woo,Action,215409889,Mission: Impossible II,PG-13,2000-05-24,123,6.1,Tom Cruise,260493,Bruce Geller,2000
4446,150000000,Paramount Pictures,USA,J.J. Abrams,Action,134029801,Mission: Impossible III,PG-13,2006-05-05,126,6.9,Tom Cruise,275630,Alex Kurtzman,2006
5526,145000000,Paramount Pictures,USA,Brad Bird,Action,209397903,Mission: Impossible - Ghost Protocol,PG-13,2011-12-21,132,7.4,Tom Cruise,388730,Bruce Geller,2011
5727,60000000,Paramount Pictures,USA,Christopher McQuarrie,Action,80070736,Jack Reacher,PG-13,2012-12-21,130,7.0,Tom Cruise,257474,Lee Child,2012
6407,150000000,Paramount Pictures,China,Christopher McQuarrie,Action,195042377,MisiÃ³n: imposible 5 - NaciÃ³n secreta,PG-13,2015-07-31,131,7.4,Tom Cruise,265527,Christopher McQuarrie,2015
6661,60000000,Paramount Pictures,China,Edward Zwick,Action,58399160,Jack Reacher: Sin regreso,PG-13,2016-10-21,118,6.1,Tom Cruise,91071,Richard Wenk,2016
905,60000000,Don Simpson/Jerry Bruckheimer Films,USA,Tony Scott,Action,82670733,Days of Thunder,PG-13,1990-06-27,107,5.9,Tom Cruise,65262,Robert Towne,1990
169,10000000,Paramount Pictures,USA,Michelle Manning,Action,6947787,Blue City,R,1986-05-02,83,4.4,Judd Nelson,792,Ross Macdonald,1986


#### #5

In [27]:
movie_name = "The Shawshank Redemption"
movie_recommendation = imdb(movie_name.lower())
movie_recommendation

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
2863,60000000,Castle Rock Entertainment,USA,Frank Darabont,Crime,136801374,The Green Mile,R,1999-12-10,189,8.5,Tom Hanks,879924,Stephen King,1999
883,20000000,Castle Rock Entertainment,USA,Rob Reiner,Crime,61276872,Misery,R,1990-11-30,107,7.8,James Caan,147278,Stephen King,1990
1629,20000000,Castle Rock Entertainment,Canada,Harold Becker,Crime,46044636,Malicia,R,1993-10-01,107,6.4,Alec Baldwin,19410,Aaron Sorkin,1993
1670,0,Channel Four Films,USA,Allison Anders,Crime,3269420,Mi vida loca,R,1994-07-15,92,6.5,Angel Aviles,1614,Allison Anders,1994
1698,11000000,New Line Cinema,USA,Michael Apted,Crime,16696219,Blink,R,1994-01-26,106,6.2,Madeleine Stowe,6770,Dana Stevens,1994
1761,8000000,Miramax,USA,Quentin Tarantino,Crime,107928762,Pulp Fiction,R,1994-10-14,154,8.9,John Travolta,1456787,Quentin Tarantino,1994
1763,0,Gaumont,France,Luc Besson,Crime,19501238,LÃ©on: The Professional,R,1994-11-18,110,8.6,Jean Reno,806235,Luc Besson,1994
1771,34000000,Warner Bros.,USA,Oliver Stone,Crime,50282766,Natural Born Killers,R,1994-08-26,118,7.3,Woody Harrelson,185627,Quentin Tarantino,1994
1833,3500000,New Line Cinema,USA,Jeff Pollack,Crime,16036534,Above the Rim,R,1994-03-23,96,6.6,Duane Martin,11508,Jeff Pollack,1994
1930,15000000,Tig Productions,USA,John Bailey,Crime,3038499,China Moon,R,1994-03-04,99,6.2,Ed Harris,4508,Roy Carlson,1994


#### #6

In [28]:
movie_name = "Saving Christmas"
movie_recommendation = imdb(movie_name.lower())
movie_recommendation

Unnamed: 0,budget,company,country,director,genre,gross,name,rating,released,runtime,score,star,votes,writer,year
6269,22000000,Amblin Entertainment,USA,Lasse HallstrÃ¶m,Comedy,54240821,The Hundred-Foot Journey,PG,2014-08-08,122,7.3,Helen Mirren,61319,Steven Knight,2014
6318,28000000,Walt Disney Pictures,USA,Miguel Arteta,Comedy,66954149,"Alexander and the Terrible, Horrible, No Good,...",PG,2014-10-10,81,6.2,Steve Carell,33226,Rob Lieber,2014
6356,5000000,Affirm Films,USA,Andrew Erwin,Comedy,10429707,Moms' Night Out,PG,2014-05-09,98,5.4,Sarah Drew,11445,Jon Erwin,2014
5921,0,Razor Film Produktion GmbH,Saudi Arabia,Haifaa Al-Mansour,Comedy,1347578,La bicicleta verde,PG,2014-04-25,98,7.6,Waad Mohammed,14903,Haifaa Al-Mansour,2014
6019,11000000,No Trace Camping,Ireland,Michael Dowse,Comedy,3452117,What If,PG-13,2014-08-15,98,6.8,Daniel Radcliffe,57422,Elan Mastai,2014
6207,40000000,Twentieth Century Fox Film Corporation,USA,Nick Cassavetes,Comedy,83911193,The Other Woman,PG-13,2014-04-25,109,6.0,Cameron Diaz,113167,Melissa Stack,2014
6220,40000000,Gulfstream Pictures,USA,Frank Coraci,Comedy,46294610,Blended,PG-13,2014-05-23,117,6.5,Adam Sandler,96218,Ivan Menchell,2014
6223,65000000,Marcy Media,USA,Will Gluck,Comedy,85911262,Annie,PG,2014-12-19,118,5.3,QuvenzhanÃ© Wallis,28204,Will Gluck,2014
6246,13000000,Chernin Entertainment,USA,Theodore Melfi,Comedy,44134898,St. Vincent,PG-13,2014-10-24,102,7.3,Bill Murray,82653,Theodore Melfi,2014
6266,16800000,Gravier Productions,USA,Woody Allen,Comedy,10506939,Magic in the Moonlight,PG-13,2014-08-15,97,6.6,Colin Firth,53136,Woody Allen,2014


In [29]:
joblib.dump(matrix, 'modelJoblib')

['modelJoblib']