In [12]:
import csv
from collections import defaultdict, namedtuple, Counter, deque
from operator import itemgetter

In [13]:
MOVIE_DATA = 'movie_metadata.csv'
NUM_TOP_DIRECTORS = 20
MIN_MOVIES = 4
MIN_YEAR = 1960

In [14]:
Movie = namedtuple('Movie', 'title year score')

In [15]:
def get_movies_by_director():
    movies_by_director = defaultdict(list)

    with open(MOVIE_DATA, encoding='UTF-8') as fin:
        reader = csv.DictReader(fin)
        for movie in reader:
            movies_by_director[movie['director_name']].append(
                Movie(movie['movie_title'].replace('\xa0', '').strip(), movie['title_year'], movie['imdb_score']))
        return movies_by_director

In [17]:
def get_average_scores(directors):
    '''Filter out directors with < MIN_MOVIES and calculate averge score'''
    directors = {key: {'score': 
                       _calc_mean(val),
                       'movies':
                       [movie for movie in val if int(movie.year) >= MIN_YEAR]}
                 for key, val in directors.items() 
                 if (len(val) >= MIN_MOVIES) & (key != '')}
    return directors

In [49]:
def _calc_mean(movies):
    return round(sum(
        [float(movie.score) 
         for movie in movies 
         if int(movie.year) >= MIN_YEAR]
        ) / len(movies), 1)

In [50]:
def print_results(directors):
    scores = {key: val['score'] for key, val in directors.items()}
    sorted_directors = sorted(scores, key=scores.__getitem__, reverse=True)
       
    for counter, director in enumerate(sorted_directors):     
        if counter == 20:
            break
        vals = directors[director]
        avg = round(vals['score'],1)
        print(f'{counter + 1:02}. {director:<52} {avg}')
        
        for movie in vals['movies']:
            print(f'{movie.year}] {movie.title:<50} {movie.score}')
        print('-' * 60)

In [51]:
directors = get_movies_by_director()

In [52]:
_calc_mean(directors['Sergio Leone'])
# directors['Sergio Leone']

8.5

In [53]:
directors = get_average_scores(directors)

In [54]:
print_results(directors)

01. Sergio Leone                                         8.5
1984] Once Upon a Time in America                        8.4
1968] Once Upon a Time in the West                       8.6
1966] The Good, the Bad and the Ugly                     8.9
1964] A Fistful of Dollars                               8.0
------------------------------------------------------------
02. Christopher Nolan                                    8.4
2012] The Dark Knight Rises                              8.5
2008] The Dark Knight                                    9.0
2014] Interstellar                                       8.6
2010] Inception                                          8.8
2005] Batman Begins                                      8.3
2002] Insomnia                                           7.2
2006] The Prestige                                       8.5
2000] Memento                                            8.5
------------------------------------------------------------
03. Quentin Tarantino   

In [55]:
def test():
    directors = get_movies_by_director()

    assert 'Sergio Leone' in directors
    assert 'Andrew Stanton' in directors  # has 3 movies, but not yet filtered
    assert len(directors['Sergio Leone']) == 4
    assert len(directors['Peter Jackson']) == 12

    movies_sergio = directors['Sergio Leone']
    movies_nolan = directors['Christopher Nolan']
    assert _calc_mean(movies_sergio) == 8.5
    assert _calc_mean(movies_nolan) == 8.4

    directors = get_average_scores(directors)
    assert 'Andrew Stanton' not in directors  # director 3 movies now filtered out

    expected_directors = ['Sergio Leone', 'Christopher Nolan', 'Quentin Tarantino',
                          'Hayao Miyazaki', 'Frank Darabont', 'Stanley Kubrick']
    expected_avg_scores = [8.5, 8.4, 8.2, 8.2, 8.0, 8.0]
    expected_num_movies = [4, 8, 8, 4, 4, 7]
    report = sorted(directors.items(), key=lambda x: float(x[0][1]), reverse=True)
    for counter, (i, j, k) in enumerate(
                            zip(expected_directors,
                                expected_avg_scores, expected_num_movies)):
        assert report[counter][0] == (i, j)
        assert len(report[counter][1]) == k
        assert  _calc_mean(report[counter][1]) == j

    return "tests pass"

In [56]:
test()

ValueError: could not convert string to float: 'a'

In [58]:
directors.items()

dict_items([('James Cameron', {'score': 7.9, 'movies': [Movie(title='Avatar', year='2009', score='7.9'), Movie(title='Titanic', year='1997', score='7.7'), Movie(title='Terminator 2: Judgment Day', year='1991', score='8.5'), Movie(title='True Lies', year='1994', score='7.2'), Movie(title='The Abyss', year='1989', score='7.6'), Movie(title='Aliens', year='1986', score='8.4'), Movie(title='The Terminator', year='1984', score='8.1')]}), ('Gore Verbinski', {'score': 7.0, 'movies': [Movie(title="Pirates of the Caribbean: At World's End", year='2007', score='7.1'), Movie(title="Pirates of the Caribbean: Dead Man's Chest", year='2006', score='7.3'), Movie(title='The Lone Ranger', year='2013', score='6.5'), Movie(title='Rango', year='2011', score='7.2'), Movie(title='Pirates of the Caribbean: The Curse of the Black Pearl', year='2003', score='8.1'), Movie(title='The Mexican', year='2001', score='6.1'), Movie(title='The Weather Man', year='2005', score='6.6')]}), ('Sam Mendes', {'score': 7.5, 'm