# Movie Recommendation System

In [71]:
# import major libraries
import os
from gdown import download
import numpy as np
import pandas as pd
from urllib import request
import re  # python regular expression
import difflib # compare sequences for strings and list


import ipywidgets as widgets
from IPython.display import display

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

## Based on ratings

### Download data

In [None]:
# Download movie data
mv_file = "https://raw.githubusercontent.com/htetaunglynn94/portfolio_projects/refs/heads/main/data/mv.csv"
root = os.getcwd()
# path = os.path.join(root, "movies.csv")
# request.urlretrieve(mv_file, path)

# # Download class file
# class_file = "https://drive.google.com/uc?export=download&id=1aeS4F5QWJhmGWFhqNGId2XUuboG5NFF_"
# root = os.getcwd()
# path = os.path.join(root, "Uinterface.py")
# download(class_file, path, quiet=False)

# File size is very large and cannot read due to google virus scanning method
# need '!pip install gdown'
rating = "https://drive.google.com/uc?export=download&id=12SjCQWIAmb1TxZ1OLt5Cp7gcXffs9bt1"
path = os.path.join(root, "ratings.csv")
download(rating, path, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?export=download&id=12SjCQWIAmb1TxZ1OLt5Cp7gcXffs9bt1
From (redirected): https://drive.google.com/uc?export=download&id=12SjCQWIAmb1TxZ1OLt5Cp7gcXffs9bt1&confirm=t&uuid=084260f3-91a7-4203-9c14-6f7f37c75035
To: /content/ratings.csv
100%|██████████| 678M/678M [00:08<00:00, 83.3MB/s]


'/content/ratings.csv'

### Defined functions

In [None]:
def clean_title(title):
    return re.sub("[^a-zA-Z0-9 ]","",title)

def find_similar_movies(movie_id):
    cols = ['movieId', 'score', 'title', 'genres']

    # Extract userId of only users who rated higher than 4
    similar_users = ratings.query("movieId == @movie_id and rating > 4")["userId"].unique()
    # Extract movieId from those similar users, keeping rating above 4
    similar_user_recs = ratings.query("userId in @similar_users and rating > 4")["movieId"]


    # Compute percentages of similar users who liked each movie
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
    # Filter only movies that more than 10% of similar users liked (removed weak recommendation)
    similar_user_recs = similar_user_recs[similar_user_recs > .10]


    # Filter all users who liked recommended movieId, keeping rating above 4
    all_users = ratings.query("movieId in @similar_user_recs.index & rating > 4")
    # Compute percentage of all users who liked each candidate movie
    all_users_recs = all_users['movieId'].value_counts() / len(all_users['userId'].unique())

    # movieId for "Similar users like percentage" and "all users like percentage"
    rec_percentages = pd.concat([similar_user_recs, all_users_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]

    # Compute recommendation socre
    rec_percentages['score'] = rec_percentages['similar'] / rec_percentages['all']
    rec_percentages = rec_percentages.sort_values("score", ascending=False)

    # Return largest 20 rows merging with movie data
    return rec_percentages.nlargest(n=20, columns="score").merge(df, on='movieId')[cols]


def search(title, n_mov):
    title = clean_title(title) # clean the title
    query_vec =vectorizer.transform([title]) # transform title into number (coordinates & values)
    similarity = cosine_similarity(query_vec, tfidf).flatten() # 2-D to 1-D (shape: (62423,)) --> btw 0 and 1
    indices = np.argsort(similarity)[-n_mov:][::-1] # sort the similarity in reverse order
    results = df.iloc[indices] # start from the end and go backward one step at a time
    return results


# def user_interface(df, vectorizer, tfidf):
def user_interface():

    def search_operation():
        """
        Handle search operation when button is clicked or Enter is pressed
        """
        title = movie_input.value.strip()
        # print(title)

        # can use due to global variables
        with movie_list:  # setup and clean up operations automatically
            movie_list.clear_output()
            if len(title) > 2:
                try:
                    display(search(title, range_slider.value))
                except Exception as e:
                    print(f"Search error: {e}")
            else:
                print("Please enter at least 3 characters")

    # Text input widgets
    movie_input = widgets.Text(value = '',                          # empty initial value
                            placeholder = 'Type a movie title...',  # place holder message
                            description = 'Movie Title:',           # description
                            style = {'description_width': '100px'}, # description width
                            layout = widgets.Layout(width='300px')) # layout for text box

    # Label
    n_movies = widgets.Label("No. of recommended movies:")    # label before the range

    # Create search button
    search_button = widgets.Button(description='Search',      # search button
                                button_style='primary')


    # Slider widget
    range_slider = widgets.IntSlider(min=0, max=20, step=1, value=2)
    min_label = widgets.Label("min")
    max_label = widgets.Label("max")


    # Create horizontal layout
    # Put input text box and button at the same row
    search_box = widgets.HBox([movie_input, search_button])
    range = widgets.HBox([min_label, range_slider, max_label])

    # Output area for results
    movie_list = widgets.Output()  # declare as global variable

    # Connect ONLY button click and Enter key
    search_button.on_click(lambda c: search_operation())
    movie_input.on_submit(lambda s: search_operation())

    display(search_box, n_movies, range, movie_list)

### Data loading

In [None]:
# Load movie file
df = pd.read_csv(mv_file)
df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


`re.sub(pattern, replacement, string)`

* `[^...]` means "NOT any of these characters"
* `a-zA-Z` means all lowercase and uppercase letters
* `0-9`**bold text** means all digits

In [None]:
df['clean_title'] = df['title'].apply(clean_title)
df.sample(5)

Unnamed: 0,movieId,title,genres,clean_title
50989,182185,Elmer Elephant (1936),Animation,Elmer Elephant 1936
61739,206775,Girl From Nowhere (2017),Thriller,Girl From Nowhere 2017
24594,121857,Too Tough to Die: A Tribute to Johnny Ramone (...,Documentary,Too Tough to Die A Tribute to Johnny Ramone 2006
54655,190081,Lucia (1968),Drama,Lucia 1968
15632,82283,Secret Ceremony (1968),Drama|Thriller,Secret Ceremony 1968


`TfidfVectorizer` converts a collection of text documents into a matrix of `TF-IDF` features, where:

- `TF` (Term Frequency) → How often a word appears in a document
- `IDF` (Inverse Document Frequency) → How unique that word is across all documents

It’s basically a way to transform text into numerical vectors while down-weighting common words like “the” and “is”.

In [None]:
# Consider for unigrams and bigrams
vectorizer = TfidfVectorizer(ngram_range=(1,2)) #(unigram, bigram)
tfidf = vectorizer.fit_transform(df['clean_title']) # output is sparse matrix
tfidf

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 446566 stored elements and shape (62423, 170073)>

In [None]:
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [None]:
user_interface()

Unnamed: 0,movieId,title,genres
14813,78499,Toy Story 3 (2010),Adventure|Animation|Children|Comedy|Fantasy|IMAX
3021,3114,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy


## Based on

In [72]:
url = "https://raw.githubusercontent.com/htetaunglynn94/portfolio_projects/refs/heads/main/data/movies.csv"
mvd = pd.read_csv(url)
mvd.sample(5)

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
2225,2225,20000000,Comedy Romance Crime Drama Mystery,http://inherentvicemovie.com/,171274,based on novel 1970s private investigator smok...,en,Inherent Vice,"In Los Angeles at the turn of the 1970s, drug-...",45.742217,...,148.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Under the paving stones, the beach!",Inherent Vice,6.5,833,Joaquin Phoenix Josh Brolin Owen Wilson Kather...,"[{'name': 'Robert Elswit', 'gender': 2, 'depar...",Paul Thomas Anderson
706,706,60000000,Adventure,,2119,stock-car-race daytona car crash,en,Days of Thunder,Talented but unproven stock car driver Cole Tr...,16.278022,...,107.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,You can't stop the thunder.,Days of Thunder,5.9,353,Tom Cruise Robert Duvall Nicole Kidman Randy Q...,"[{'name': 'Jerry Bruckheimer', 'gender': 2, 'd...",Tony Scott
867,867,54000000,Crime Drama Thriller,http://www.imdb.com/title/tt0099674/,242,italy christianity new york assassination ital...,en,The Godfather: Part III,In the midst of trying to legitimize his busin...,59.194915,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,All the power on earth can't change destiny.,The Godfather: Part III,7.1,1546,Al Pacino Diane Keaton Andy Garc\u00eda Talia ...,"[{'name': 'Walter Murch', 'gender': 2, 'depart...",Francis Ford Coppola
2099,2099,35000000,Comedy Music,http://www.schoolofrockmovie.com/,1584,rock and roll rock high school music band,en,School of Rock,"Fired from his band and hard up for cash, guit...",43.057323,...,108.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,He just landed the gig of his life: 5th grade.,School of Rock,6.7,1414,Jack Black Joan Cusack Mike White Sarah Silver...,"[{'name': 'Richard Linklater', 'gender': 2, 'd...",Richard Linklater
4236,4236,0,Drama,http://moozlumthemovie.com,66025,independent film,en,Mooz-lum,Thanks to a strict Muslim upbringing that larg...,0.414754,...,95.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,Mooz-lum,4.5,3,Nia Long Danny Glover Evan Ross Summer Bishil,"[{'name': 'Qasim Basir', 'gender': 0, 'departm...",Qasim Basir


In [73]:
mvd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   index                 4803 non-null   int64  
 1   budget                4803 non-null   int64  
 2   genres                4775 non-null   object 
 3   homepage              1712 non-null   object 
 4   id                    4803 non-null   int64  
 5   keywords              4391 non-null   object 
 6   original_language     4803 non-null   object 
 7   original_title        4803 non-null   object 
 8   overview              4800 non-null   object 
 9   popularity            4803 non-null   float64
 10  production_companies  4803 non-null   object 
 11  production_countries  4803 non-null   object 
 12  release_date          4802 non-null   object 
 13  revenue               4803 non-null   int64  
 14  runtime               4801 non-null   float64
 15  spoken_languages     

In [74]:
mvd.shape

(4803, 24)

In [75]:
selected_features = ['genres','keywords','tagline','cast','director', 'overview']

In [76]:
mvd[selected_features].isna().sum()

Unnamed: 0,0
genres,28
keywords,412
tagline,844
cast,43
director,30
overview,3


In [77]:
na_idx = mvd[mvd.isna().any(axis=1)].index
na_idx

Index([  10,   15,   24,   27,   33,   34,   47,   49,   56,   58,
       ...
       4792, 4793, 4794, 4795, 4797, 4798, 4799, 4800, 4801, 4802],
      dtype='int64', length=3371)

In [78]:
mvd.loc[na_idx]

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
10,10,270000000,Adventure Fantasy Action Science Fiction,http://www.superman.com,1452,saving the world dc comics invulnerability seq...,en,Superman Returns,Superman returns to discover his 5-year absenc...,57.925623,...,154.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,,Superman Returns,5.4,1400,Brandon Routh Kevin Spacey Kate Bosworth James...,"[{'name': 'Roger Mussenden', 'gender': 2, 'dep...",Bryan Singer
15,15,225000000,Adventure Family Fantasy,,2454,based on novel fictional place brother sister ...,en,The Chronicles of Narnia: Prince Caspian,One year after their incredible adventures in ...,53.978602,...,150.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Hope has a new face.,The Chronicles of Narnia: Prince Caspian,6.3,1630,Ben Barnes William Moseley Anna Popplewell Ska...,"[{'name': 'Liz Mullane', 'gender': 1, 'departm...",Andrew Adamson
24,24,207000000,Adventure Drama Action,,254,film business screenplay show business film ma...,en,King Kong,"In 1933 New York, an overly ambitious movie pr...",61.226010,...,187.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The eighth wonder of the world.,King Kong,6.6,2337,Naomi Watts Jack Black Adrien Brody Thomas Kre...,"[{'name': 'James Newton Howard', 'gender': 2, ...",Peter Jackson
27,27,209000000,Thriller Action Adventure Science Fiction,,44833,fight u.s. navy mind reading hong kong soccer,en,Battleship,"When mankind beams a radio signal into space, ...",64.928382,...,131.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,The Battle for Earth Begins at Sea,Battleship,5.5,2114,Taylor Kitsch Alexander Skarsg\u00e5rd Rihanna...,"[{'name': 'Paul Rubell', 'gender': 2, 'departm...",Peter Berg
33,33,210000000,Adventure Action Science Fiction Thriller,,36668,mutant marvel comic based on comic book superh...,en,X-Men: The Last Stand,"When a cure is found to treat mutations, lines...",3.857526,...,104.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Take a Stand,X-Men: The Last Stand,6.3,3525,Hugh Jackman Halle Berry Ian McKellen Patrick ...,"[{'name': 'Mark Goldblatt', 'gender': 2, 'depa...",Brett Ratner
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,4798,220000,Action Crime Thriller,,9367,united states\u2013mexico barrier legs arms pa...,es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,...,81.0,"[{""iso_639_1"": ""es"", ""name"": ""Espa\u00f1ol""}]",Released,"He didn't come looking for trouble, but troubl...",El Mariachi,6.6,238,Carlos Gallardo Jaime de Hoyos Peter Marquardt...,"[{'name': 'Robert Rodriguez', 'gender': 0, 'de...",Robert Rodriguez
4799,4799,9000,Comedy Romance,,72766,,en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,...,85.0,[],Released,A newlywed couple's honeymoon is upended by th...,Newlyweds,5.9,5,Edward Burns Kerry Bish\u00e9 Marsha Dietlein ...,"[{'name': 'Edward Burns', 'gender': 2, 'depart...",Edward Burns
4800,4800,0,Comedy Drama Romance TV Movie,http://www.hallmarkchannel.com/signedsealeddel...,231617,date love at first sight narration investigati...,en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,...,120.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,"Signed, Sealed, Delivered",7.0,6,Eric Mabius Kristin Booth Crystal Lowe Geoff G...,"[{'name': 'Carla Hetland', 'gender': 0, 'depar...",Scott Smith
4801,4801,0,,http://shanghaicalling.com/,126186,,en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,...,98.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A New Yorker in Shanghai,Shanghai Calling,5.7,7,Daniel Henney Eliza Coupe Bill Paxton Alan Ruc...,"[{'name': 'Daniel Hsia', 'gender': 2, 'departm...",Daniel Hsia


In [79]:
# Imputation
for feature in selected_features:
    mvd[feature] = mvd[feature].fillna('')
mvd.iloc[na_idx].head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
10,10,270000000,Adventure Fantasy Action Science Fiction,http://www.superman.com,1452,saving the world dc comics invulnerability seq...,en,Superman Returns,Superman returns to discover his 5-year absenc...,57.925623,...,154.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,,Superman Returns,5.4,1400,Brandon Routh Kevin Spacey Kate Bosworth James...,"[{'name': 'Roger Mussenden', 'gender': 2, 'dep...",Bryan Singer
15,15,225000000,Adventure Family Fantasy,,2454,based on novel fictional place brother sister ...,en,The Chronicles of Narnia: Prince Caspian,One year after their incredible adventures in ...,53.978602,...,150.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Hope has a new face.,The Chronicles of Narnia: Prince Caspian,6.3,1630,Ben Barnes William Moseley Anna Popplewell Ska...,"[{'name': 'Liz Mullane', 'gender': 1, 'departm...",Andrew Adamson
24,24,207000000,Adventure Drama Action,,254,film business screenplay show business film ma...,en,King Kong,"In 1933 New York, an overly ambitious movie pr...",61.22601,...,187.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The eighth wonder of the world.,King Kong,6.6,2337,Naomi Watts Jack Black Adrien Brody Thomas Kre...,"[{'name': 'James Newton Howard', 'gender': 2, ...",Peter Jackson
27,27,209000000,Thriller Action Adventure Science Fiction,,44833,fight u.s. navy mind reading hong kong soccer,en,Battleship,"When mankind beams a radio signal into space, ...",64.928382,...,131.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,The Battle for Earth Begins at Sea,Battleship,5.5,2114,Taylor Kitsch Alexander Skarsg\u00e5rd Rihanna...,"[{'name': 'Paul Rubell', 'gender': 2, 'departm...",Peter Berg
33,33,210000000,Adventure Action Science Fiction Thriller,,36668,mutant marvel comic based on comic book superh...,en,X-Men: The Last Stand,"When a cure is found to treat mutations, lines...",3.857526,...,104.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Take a Stand,X-Men: The Last Stand,6.3,3525,Hugh Jackman Halle Berry Ian McKellen Patrick ...,"[{'name': 'Mark Goldblatt', 'gender': 2, 'depa...",Brett Ratner


In [80]:
mvd[selected_features].isna().sum()

Unnamed: 0,0
genres,0
keywords,0
tagline,0
cast,0
director,0
overview,0


In [81]:
mvd.query("genres == ''").shape[0]

28

In [83]:
# main procedure
combined_features = mvd[selected_features].apply(lambda x: '__'.join(x), axis=1)
combined_features

Unnamed: 0,0
0,Action Adventure Fantasy Science Fiction__cult...
1,Adventure Fantasy Action__ocean drug abuse exo...
2,Action Adventure Crime__spy based on novel sec...
3,Action Crime Drama Thriller__dc comics crime f...
4,Action Adventure Science Fiction__based on nov...
...,...
4798,Action Crime Thriller__united states\u2013mexi...
4799,Comedy Romance____A newlywed couple's honeymoo...
4800,Comedy Drama Romance TV Movie__date love at fi...
4801,____A New Yorker in Shanghai__Daniel Henney El...


In [89]:
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)
print(feature_vectors[4])

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 66 stored elements and shape (1, 45460)>
  Coords	Values
  (0, 1462)	0.04837561565607358
  (0, 1743)	0.05593312100515051
  (0, 35993)	0.06203846480278026
  (0, 37946)	0.09636393172081796
  (0, 43581)	0.0699217792894617
  (0, 40204)	0.06591025110954918
  (0, 44806)	0.11413289949467749
  (0, 29629)	0.07451325915017669
  (0, 21880)	0.03250772839584474
  (0, 40992)	0.02419750879373696
  (0, 29779)	0.07729913248365236
  (0, 4522)	0.07641842636038708
  (0, 2604)	0.09960750642262593
  (0, 2567)	0.04291332114170886
  (0, 14681)	0.1230283828047165
  (0, 21941)	0.05135074917737557
  (0, 29378)	0.07940420756451147
  (0, 28468)	0.08378422293944751
  (0, 44258)	0.04413527145881766
  (0, 15756)	0.14448532312731607
  (0, 26186)	0.2700057897045326
  (0, 26845)	0.15010402694498695
  (0, 41393)	0.09765215341040448
  (0, 32143)	0.17156096726758654
  (0, 21250)	0.11758280008083831
  :	:
  (0, 43882)	0.14448532312731607
  (0, 16645)	0.0843375446

In [90]:
# Apply consine_similarity
similarity = cosine_similarity(feature_vectors)
similarity

array([[1.        , 0.0455758 , 0.02085448, ..., 0.02498382, 0.01138446,
        0.00663412],
       [0.0455758 , 1.        , 0.03905452, ..., 0.04548676, 0.03122382,
        0.01623675],
       [0.02085448, 0.03905452, 1.        , ..., 0.02536949, 0.01180806,
        0.01466201],
       ...,
       [0.02498382, 0.04548676, 0.02536949, ..., 1.        , 0.03796027,
        0.02027747],
       [0.01138446, 0.03122382, 0.01180806, ..., 0.03796027, 1.        ,
        0.03352437],
       [0.00663412, 0.01623675, 0.01466201, ..., 0.02027747, 0.03352437,
        1.        ]])

In [92]:
similarity.shape

(4803, 4803)

In [99]:
duplicated_titles = mvd[mvd['title'].duplicated()]['title'].values
duplicated_titles

array(['The Host', 'Out of the Blue', 'Batman'], dtype=object)

In [100]:
for t in duplicated_titles:
    display(mvd[mvd['title'] == t]['title'])

Unnamed: 0,title
972,The Host
2877,The Host


Unnamed: 0,title
3647,Out of the Blue
3693,Out of the Blue


Unnamed: 0,title
1359,Batman
4267,Batman


In [108]:
# [The Host', 'Out of the Blue', 'Batman'] are duplicated
movie_titles = mvd['title'].to_list()
print(movie_titles)

['Avatar', "Pirates of the Caribbean: At World's End", 'Spectre', 'The Dark Knight Rises', 'John Carter', 'Spider-Man 3', 'Tangled', 'Avengers: Age of Ultron', 'Harry Potter and the Half-Blood Prince', 'Batman v Superman: Dawn of Justice', 'Superman Returns', 'Quantum of Solace', "Pirates of the Caribbean: Dead Man's Chest", 'The Lone Ranger', 'Man of Steel', 'The Chronicles of Narnia: Prince Caspian', 'The Avengers', 'Pirates of the Caribbean: On Stranger Tides', 'Men in Black 3', 'The Hobbit: The Battle of the Five Armies', 'The Amazing Spider-Man', 'Robin Hood', 'The Hobbit: The Desolation of Smaug', 'The Golden Compass', 'King Kong', 'Titanic', 'Captain America: Civil War', 'Battleship', 'Jurassic World', 'Skyfall', 'Spider-Man 2', 'Iron Man 3', 'Alice in Wonderland', 'X-Men: The Last Stand', 'Monsters University', 'Transformers: Revenge of the Fallen', 'Transformers: Age of Extinction', 'Oz: The Great and Powerful', 'The Amazing Spider-Man 2', 'TRON: Legacy', 'Cars 2', 'Green Lant

In [111]:
mv_name = 'superman'
matched_movies = difflib.get_close_matches(mv_name, movie_titles)
matched_movies

['Superman', 'Superman II', 'Superman III']