In [1]:
from collections import Counter
from collections import defaultdict
from collections import namedtuple

import numpy as np
import pandas as pd

In [2]:
data = 'https://raw.githubusercontent.com/pybites/challenges/solutions/13/movie_metadata.csv'
movies = pd.read_csv(data)

In [3]:
director_movies = movies[['director_name','movie_title','title_year','imdb_score']].copy()
director_movies.rename(columns={'movie_title':'title',
                                'title_year':'year',
                                'imdb_score':'score'}, inplace=True)

In [4]:
director_movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5043 entries, 0 to 5042
Data columns (total 4 columns):
director_name    4939 non-null object
title            5043 non-null object
year             4935 non-null float64
score            5043 non-null float64
dtypes: float64(2), object(2)
memory usage: 157.7+ KB


In [5]:
director_movies.dropna(inplace=True)

In [6]:
Movie = namedtuple('Movie',' '.join(director_movies.columns[1:]))
directors = defaultdict(list)

In [7]:
for row in director_movies.itertuples():

    title = row[2].replace('\xa0', '')
    
    year = None
    try:
        year = int(float(str(row[3])))
    except ValueError:
        year = np.nan
    
    m = Movie(title=title, year=year, score=row[4])
    directors[row[1]].append(m)

In [8]:
# Christopher Nolan movies
film_director = 'Christopher Nolan'
directors[film_director]

[Movie(title='The Dark Knight Rises', year=2012, score=8.5),
 Movie(title='The Dark Knight', year=2008, score=9.0),
 Movie(title='Interstellar', year=2014, score=8.6),
 Movie(title='Inception', year=2010, score=8.8),
 Movie(title='Batman Begins', year=2005, score=8.3),
 Movie(title='Insomnia', year=2002, score=7.2),
 Movie(title='The Prestige', year=2006, score=8.5),
 Movie(title='Memento', year=2000, score=8.5)]

In [9]:
# Directors with the most movies
counter = Counter()
for director, movies in directors.items():
    counter[director] += len(movies)
    
counter.most_common(5)

[('Steven Spielberg', 26),
 ('Woody Allen', 22),
 ('Martin Scorsese', 20),
 ('Clint Eastwood', 20),
 ('Ridley Scott', 17)]