In [9]:
# Basic libraries
import pandas as pd
import numpy as np
import ast

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing utilities
from itertools import combinations
from difflib import SequenceMatcher
import datetime

In [13]:
movie_meta_columns = ['movie_id',
                    'freebase_movie_id',
                    'movie_name',
                    'release_date',
                    'box_office_revenue',
                    'runtime',
                    'languages',
                    'countries',
                    'genres']

data = pd.read_csv('data/movie.metadata.tsv', sep='\t', header=None, names=movie_meta_columns)
data

Unnamed: 0,movie_id,freebase_movie_id,movie_name,release_date,box_office_revenue,runtime,languages,countries,genres
0,975900,/m/03vyhn,Ghosts of Mars,2001-08-24,14010832.0,98.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/01jfsb"": ""Thriller"", ""/m/06n90"": ""Science..."
1,3196793,/m/08yl5d,Getting Away with Murder: The JonBenét Ramsey ...,2000-02-16,,95.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/02n4kr"": ""Mystery"", ""/m/03bxz7"": ""Biograp..."
2,28463795,/m/0crgdbh,Brun bitter,1988,,83.0,"{""/m/05f_3"": ""Norwegian Language""}","{""/m/05b4w"": ""Norway""}","{""/m/0lsxr"": ""Crime Fiction"", ""/m/07s9rl0"": ""D..."
3,9363483,/m/0285_cd,White Of The Eye,1987,,110.0,"{""/m/02h40lc"": ""English Language""}","{""/m/07ssc"": ""United Kingdom""}","{""/m/01jfsb"": ""Thriller"", ""/m/0glj9q"": ""Erotic..."
4,261236,/m/01mrr1,A Woman in Flames,1983,,106.0,"{""/m/04306rv"": ""German Language""}","{""/m/0345h"": ""Germany""}","{""/m/07s9rl0"": ""Drama""}"
...,...,...,...,...,...,...,...,...,...
81736,35228177,/m/0j7hxnt,Mermaids: The Body Found,2011-03-19,,120.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/07s9rl0"": ""Drama""}"
81737,34980460,/m/0g4pl34,Knuckle,2011-01-21,,96.0,"{""/m/02h40lc"": ""English Language""}","{""/m/03rt9"": ""Ireland"", ""/m/07ssc"": ""United Ki...","{""/m/03bxz7"": ""Biographical film"", ""/m/07s9rl0..."
81738,9971909,/m/02pygw1,Another Nice Mess,1972-09-22,,66.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/06nbt"": ""Satire"", ""/m/01z4y"": ""Comedy""}"
81739,913762,/m/03pcrp,The Super Dimension Fortress Macross II: Lover...,1992-05-21,,150.0,"{""/m/03_9r"": ""Japanese Language""}","{""/m/03_3d"": ""Japan""}","{""/m/06n90"": ""Science Fiction"", ""/m/0gw5n2f"": ..."


In [22]:
topic1 = [5045106, 77653, 12091274, 2014153, 7342730]
data.loc[data['movie_id'].isin(topic1)]

Unnamed: 0,movie_id,freebase_movie_id,movie_name,release_date,box_office_revenue,runtime,languages,countries,genres
453,12091274,/m/02vpjr8,Puccini for Beginners,2007-02-02,110864.0,81.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/06cvj"": ""Romantic comedy"", ""/m/0hn10"": ""L..."
40742,7342730,/m/0kv5fy,Waiter,2006,,97.0,"{""/m/02bv9"": ""Dutch Language"", ""/m/02h40lc"": ""...","{""/m/0154j"": ""Belgium"", ""/m/059j2"": ""Netherlan...","{""/m/0vgkd"": ""Black comedy""}"
53953,77653,/m/0kb7w,Heaven Can Wait,1943-08-11,,112.0,"{""/m/064_8sq"": ""French Language"", ""/m/02h40lc""...","{""/m/09c7w0"": ""United States of America""}","{""/m/06cvj"": ""Romantic comedy"", ""/m/07s9rl0"": ..."
57549,5045106,/m/0d09h0,Ullam Ketkumae,2005,,156.0,"{""/m/07c9s"": ""Tamil Language""}","{""/m/03rk0"": ""India""}","{""/m/02l7c8"": ""Romance Film""}"
67482,2014153,/m/06f2zd,2 Young,2005-04-15,,108.0,"{""/m/0459q4"": ""Standard Cantonese"", ""/m/012w70...","{""/m/03h64"": ""Hong Kong""}","{""/m/03q4nz"": ""World cinema"", ""/m/07s9rl0"": ""D..."


Seems to be romantic comedies:

https://en.wikipedia.org/wiki/Puccini_for_Beginners

https://en.wikipedia.org/wiki/Waiter_(film)

https://en.wikipedia.org/wiki/Heaven_Can_Wait_%281943_film%29

https://en.wikipedia.org/wiki/Ullam_Ketkumae

https://en.wikipedia.org/wiki/2_Young

In [25]:
topic2 = [5826081, 33113634, 29677455, 28869936, 240483]
data.loc[data['movie_id'].isin(topic2)]

Unnamed: 0,movie_id,freebase_movie_id,movie_name,release_date,box_office_revenue,runtime,languages,countries,genres
6531,29677455,/m/0fphr20,Stalingrad,1990-02,,,"{""/m/06b_j"": ""Russian Language""}","{""/m/09c7w0"": ""United States of America"", ""/m/...","{""/m/082gq"": ""War film""}"
17562,5826081,/m/0f7l0p,Mivtsa Yonatan,1977,,54.0,"{""/m/0jzc"": ""Arabic Language"", ""/m/03hkp"": ""He...","{""/m/03spz"": ""Israel""}","{""/m/01jfsb"": ""Thriller"", ""/m/03q4nz"": ""World ..."
17679,33113634,/m/0h67m0d,The Unforgettable Year 1919,1952-05-03,,108.0,"{""/m/06b_j"": ""Russian Language""}",{},"{""/m/02p0szs"": ""Historical fiction"", ""/m/07s9r..."
23852,240483,/m/01jwxx,Battle of Britain,1969-09-15,,126.0,"{""/m/064_8sq"": ""French Language"", ""/m/05qqm"": ...","{""/m/07ssc"": ""United Kingdom""}","{""/m/03g3w"": ""History"", ""/m/06l3bl"": ""Epic"", ""..."
54279,28869936,/m/0ddfv67,The Sino-Dutch War 1661,2000,,101.0,"{""/m/0653m"": ""Standard Mandarin"", ""/m/012w70"":...","{""/m/03h64"": ""Hong Kong"", ""/m/0d05w3"": ""China""}","{""/m/02kdv5l"": ""Action"", ""/m/07s9rl0"": ""Drama""..."


Some kind of historical movies about periods of upheaval and adversity


https://en.wikipedia.org/wiki/Stalingrad_(1990_film)

https://en.wikipedia.org/wiki/Operation_Thunderbolt_(film)

https://en.wikipedia.org/wiki/The_Unforgettable_Year_1919

https://en.wikipedia.org/wiki/Battle_of_Britain_(film)

https://en.wikipedia.org/wiki/The_Sino-Dutch_War_1661

In [26]:
topic3 = [24121382, 24190384, 32852772, 25432586, 32173262]
data.loc[data['movie_id'].isin(topic3)]

Unnamed: 0,movie_id,freebase_movie_id,movie_name,release_date,box_office_revenue,runtime,languages,countries,genres
25962,24121382,/m/07kg8yh,Smart Alecks,1942-08-07,,88.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/0lsxr"": ""Crime Fiction"", ""/m/02l7c8"": ""Ro..."
44143,24190384,/m/07kbrnh,The Crime Patrol,1936-05-13,,59.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/0lsxr"": ""Crime Fiction"", ""/m/02n4kr"": ""My..."
66550,25432586,/m/09k70sr,The Casino Job,2009,,81.0,{},"{""/m/09c7w0"": ""United States of America""}","{""/m/01jfsb"": ""Thriller"", ""/m/02wtdps"": ""Crime..."
69371,32852772,/m/04j10dv,Roarin' Lead,1936,,52.0,{},{},"{""/m/03btsm8"": ""Action/Adventure"", ""/m/0hfjk"":..."
75584,32173262,/m/0gx0cvh,Mr. Muggs Rides Again,1945-07-15,,63.0,{},{},"{""/m/05p553"": ""Comedy film""}"


https://en.wikipedia.org/wiki/Smart_Alecks

https://en.wikipedia.org/wiki/The_Crime_Patrol

https://en.wikipedia.org/wiki/The_Casino_Job

https://en.wikipedia.org/wiki/Roarin%27_Lead

https://en.wikipedia.org/wiki/Mr._Muggs_Rides_Again

In [28]:
topic4 = [10131383, 9930507, 10131333, 23617806, 14205601]
data.loc[data['movie_id'].isin(topic4)]

Unnamed: 0,movie_id,freebase_movie_id,movie_name,release_date,box_office_revenue,runtime,languages,countries,genres
12759,10131333,/m/02q2t86,Zoom and Bored,1957-09-14,,6.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/02hmvc"": ""Short Film""}"
14305,14205601,/m/03cxrqj,"Ready, Woolen and Able",1960-07-30,,,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/02hmvc"": ""Short Film"", ""/m/0hqxf"": ""Famil..."
35760,23617806,/m/06zmcll,Hare-Breadth Hurry,1963,,7.0,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/02hmvc"": ""Short Film"", ""/m/0hqxf"": ""Famil..."
47525,9930507,/m/02pxb75,Lickety-Splat,1961-06-03,,,"{""/m/02h40lc"": ""English Language""}","{""/m/09c7w0"": ""United States of America""}","{""/m/02hmvc"": ""Short Film"", ""/m/0hqxf"": ""Famil..."
48432,10131383,/m/02q2tb8,"Whoa, Be-Gone!",1958-04-12,,,"{""/m/02h40lc"": ""English Language""}",{},"{""/m/02hmvc"": ""Short Film"", ""/m/0hqxf"": ""Famil..."


https://en.wikipedia.org/wiki/Zoom_and_Bored

https://en.wikipedia.org/wiki/Ready,_Woolen_and_Able

https://en.wikipedia.org/wiki/Hare-Breadth_Hurry

https://en.wikipedia.org/wiki/Lickety-Splat

