In [5]:
import pandas as pd

from imdb import Cinemagoer

### relevant fields:

**Keys**
- 'imdbID',
- 'title',

**Meta Data**
- 'original title',
- 'rating',
- 'director',
- 'runtimes',
- 'genres',
- 'box office',
- 'cover url',
- 'year', 
- 'top 250 rank',
- 'awards',
- 'akas',
- 'cast',

**Geo**
- 'country codes'
- 'countries',
- 'locations',
- 'language codes',


**Text**
- 'plot',
- 'plot outline',
- 'critic reviews',
- 'reviews',
- 'quotes'

In [None]:
'awards'  ???

# Get movie Meta Data

In [14]:
def get_all_movie_data(movie_name):
    ia = Cinemagoer()
    movie_search = ia.search_movie(movie_name)
    movie_id = movie_search[0].movieID
    movie_data = ia.get_movie(movie_id)
    ia.update(movie_data,'quotes')
    ia.update(movie_data,'reviews')
    ia.update(movie_data,'locations')
    
    
    return movie_data

In [8]:
def get_meta_data(movie_name):
    # instantiate movie request
    movie_data = get_all_movie_data(movie_name)

    # preprocess directors in case of list
    directors = []
    directors_obj = movie_data['director']
    for director in directors_obj:
        directors.append(director['name'])

    # preprocess cast list only keep top 5 listed
    cast = []
    cast_obj = movie_data['cast']
    for actor in cast_obj:
        cast.append(actor['name'])

    cast = cast[:5]

    # collect all data points to dict
    movie_meta = { "imdb_id": movie_data['imdbID'],
                   "title": movie_data['title'],
                   "rating": movie_data['rating'],
                   "director": directors,
                   "runtime" : '{0} Min'.format(movie_data['runtimes'][0]),
                   "genres" : movie_data['genres'],
                   "year" : movie_data['year'],
                   "box_office" : movie_data['box office']['Cumulative Worldwide Gross'],
                   "top_250_rank" : movie_data['top 250 rank'],
                   "cast" : cast, 
                   "cover_url" : movie_data['cover url'],
                   "akas" : movie_data['akas'],
                   "countries" : movie_data['countries']
                 }
    return movie_meta

In [9]:
def get_movie_quotes_df(movie_name):
    #get data
    movie_data = get_all_movie_data(movie_name)
    #create quotes dict
    quotes_dict = {"imdb_id": movie_data['imdbID'],
                   "title": movie_data['title'],
                   "quotes": movie_data['quotes']}
    #create quotes df
    quotes_df = pd.DataFrame(quotes_dict)
    #turn quotes from lists to strings
    quotes_df['quotes'] = quotes_df.apply(lambda x : ", ".join(x['quotes']), axis=1)
    
    return quotes_df

In [28]:
def get_movie_reviews_df(movie_name):
    #get data
    movie_data = get_all_movie_data(movie_name)
    
    #create quotes df
    reviews_df = pd.DataFrame(movie_data['reviews'])
    reviews_df['imdb_id'] = movie_data['imdbID']
    reviews_df['title'] = movie_data['title']
      
    return reviews_df

In [32]:
def get_movie_locations_df(movie_name):
    #get data
    movie_data = get_all_movie_data(movie_name)
    
    #create quotes df
    locations_df = pd.DataFrame(movie_data['locations'])
    locations_df['imdb_id'] = movie_data['imdbID']
    locations_df['title'] = movie_data['title']
      
    return locations_df

In [30]:
get_meta_data('matrix')

{'akas': ['Matrix (Japan, English title)',
  'Matrix (France)',
  'Matrix (Germany)',
  'Matrix (Spain)',
  '黑客帝国 (China, Mandarin title)'],
 'box_office': '$463,517,383',
 'cast': ['Keanu Reeves',
  'Laurence Fishburne',
  'Carrie-Anne Moss',
  'Hugo Weaving',
  'Gloria Foster'],
 'countries': ['United States', 'Australia'],
 'cover_url': 'https://m.media-amazon.com/images/M/MV5BNzQzOTk3OTAtNDQ0Zi00ZTVkLWI0MTEtMDllZjNkYzNjNTc4L2ltYWdlXkEyXkFqcGdeQXVyNjU0OTQ0OTY@._V1_SX101_CR0,0,101,150_.jpg',
 'director': ['Lana Wachowski', 'Lilly Wachowski'],
 'genres': ['Action', 'Sci-Fi'],
 'imdb_id': '0133093',
 'rating': 8.7,
 'runtime': '136 Min',
 'title': 'The Matrix',
 'top_250_rank': 16,
 'year': 1999}

In [31]:
get_movie_quotes_df('matrix')

Unnamed: 0,imdb_id,quotes,title
0,0133093,Morpheus: This is your last chance. After this...,The Matrix
1,0133093,Agent Smith: I'd like to share a revelation th...,The Matrix
2,0133093,Spoon boy: Do not try and bend the spoon. That...,The Matrix
3,0133093,Neo: What are you trying to tell me? That I ca...,The Matrix
4,0133093,"Morpheus: Neo, sooner or later you're going to...",The Matrix
...,...,...,...
110,0133093,"Agent Smith: Dammit., Agent Brown: The trace w...",The Matrix
111,0133093,"Mouse: [to Neo] So I understand that, uh, you'...",The Matrix
112,0133093,Morpheus: What are you waiting for? You're fas...,The Matrix
113,0133093,[Neo has just been revived after escaping from...,The Matrix


In [29]:
get_movie_reviews_df('matrix')

Unnamed: 0,author,content,date,helpful,not_helpful,rating,title,imdb_id
0,ur2467618,There are currently almost 2800 reviews on IMD...,22 February 2018,39,178,7.0,The Matrix,133093
1,ur20552756,'The Matrix' may not be one of the very greate...,20 February 2017,38,10,9.0,The Matrix,133093
2,ur0482513,"What is THE MATRIX? Well, let me tell you: it'...",26 August 2016,14,7,9.0,The Matrix,133093
3,ur2898520,The Wachowski brothers have created one of the...,9 November 2013,13,6,9.0,The Matrix,133093
4,ur0278527,"This is one of those movies that ""everyone"" ha...",20 August 2019,17,6,9.0,The Matrix,133093
5,ur61197531,and this is all. because each explanation soun...,8 January 2017,28,5,,The Matrix,133093
6,ur3914439,"15 years ago, back in the 20th century, the fi...",29 November 2014,17,90,6.0,The Matrix,133093
7,ur4569900,What made The Matrix special was the groundbre...,28 March 2016,14,21,7.0,The Matrix,133093
8,ur1002035,Neo is a computer programmer by day and a inte...,20 June 2002,13,13,,The Matrix,133093
9,ur5876717,"The Wachowsky Brothers took many elements, fro...",21 July 2009,3,5,9.0,The Matrix,133093


In [33]:
get_movie_locations_df('matrix')

Unnamed: 0,0,imdb_id,title
0,"Nashville, Tennessee, USA::(exterior scenes: s...",133093,The Matrix
1,"Sydney, New South Wales, Australia",133093,The Matrix
2,"Redfern, Sydney, New South Wales, Australia::(...",133093,The Matrix
3,"San Francisco, California, USA::(exterior scen...",133093,The Matrix
4,"Sydney Central Business District, Sydney, New ...",133093,The Matrix
5,"Colonial State Bank Centre, Martin Place, Sydn...",133093,The Matrix
6,"Westin Hotel, Martin Place, Sydney, New South ...",133093,The Matrix
7,"Fox Studios, Moore Park, Sydney, New South Wal...",133093,The Matrix
8,"Forty One Restaurant, Chifley Tower, Sydney, N...",133093,The Matrix
9,"AON Tower, Kent Street, Sydney, New South Wale...",133093,The Matrix
