In [1]:
import csv
from collections import defaultdict, namedtuple, Counter, deque
from operator import itemgetter

In [2]:
MOVIE_DATA = 'movie_metadata.csv'
NUM_TOP_DIRECTORS = 20
MIN_MOVIES = 4
MIN_YEAR = 1960

In [3]:
Movie = namedtuple('Movie', 'title year score')

In [4]:
def get_movies_by_director():
    movies_by_director = defaultdict(list)

    with open(MOVIE_DATA, encoding='UTF-8') as fin:
        reader = csv.DictReader(fin)
        for movie in reader:
            movies_by_director[movie['director_name']].append(
                Movie(movie['movie_title'].replace('\xa0', '').strip(), movie['title_year'], movie['imdb_score']))
        return movies_by_director

In [5]:
def get_average_scores(directors):
    '''Filter out directors with < MIN_MOVIES and calculate averge score'''
    directors = {key: {'score': 
                       _calc_mean(val),
                       'movies':
                       [movie for movie in val if int(movie.year) >= MIN_YEAR]}
                 for key, val in directors.items() 
                 if (len(val) >= MIN_MOVIES) & (key != '')}
    return directors

In [6]:
def _calc_mean(movies):
    return round(sum(
        [float(movie.score) 
         for movie in movies 
         if int(movie.year) >= MIN_YEAR]
        ) / len(movies), 1)

In [7]:
def print_results(directors):
    scores = {key: val['score'] for key, val in directors.items()}
    sorted_directors = sorted(scores, key=scores.__getitem__, reverse=True)
       
    for counter, director in enumerate(sorted_directors):     
        if counter == 20:
            break
        vals = directors[director]
        avg = round(vals['score'],1)
        print(f'{counter + 1:02}. {director:<52} {avg}')
        
        for movie in vals['movies']:
            print(f'{movie.year}] {movie.title:<50} {movie.score}')
        print('-' * 60)

In [8]:
directors = get_movies_by_director()

In [9]:
_calc_mean(directors['Sergio Leone'])
# directors['Sergio Leone']

8.5

In [10]:
directors = get_average_scores(directors)

In [11]:
print_results(directors)

01. Sergio Leone                                         8.5
1984] Once Upon a Time in America                        8.4
1968] Once Upon a Time in the West                       8.6
1966] The Good, the Bad and the Ugly                     8.9
1964] A Fistful of Dollars                               8.0
------------------------------------------------------------
02. Christopher Nolan                                    8.4
2012] The Dark Knight Rises                              8.5
2008] The Dark Knight                                    9.0
2014] Interstellar                                       8.6
2010] Inception                                          8.8
2005] Batman Begins                                      8.3
2002] Insomnia                                           7.2
2006] The Prestige                                       8.5
2000] Memento                                            8.5
------------------------------------------------------------
03. Quentin Tarantino   

In [41]:
def test():
    directors = get_movies_by_director()

    assert 'Sergio Leone' in directors
    assert 'Andrew Stanton' in directors  # has 3 movies, but not yet filtered
    assert len(directors['Sergio Leone']) == 4
    assert len(directors['Peter Jackson']) == 12

    movies_sergio = directors['Sergio Leone']
    movies_nolan = directors['Christopher Nolan']
    assert _calc_mean(movies_sergio) == 8.5
    assert _calc_mean(movies_nolan) == 8.4

    directors = get_average_scores(directors)
    assert 'Andrew Stanton' not in directors  # director 3 movies now filtered out

    expected_directors = ['Sergio Leone', 'Christopher Nolan', 'Quentin Tarantino',
                          'Hayao Miyazaki', 'Frank Darabont', 'Stanley Kubrick']
    expected_avg_scores = [8.5, 8.4, 8.2, 8.2, 8.0, 8.0]
    expected_num_movies = [4, 8, 8, 4, 4, 7]

    for counter, (i, j, k) in enumerate(
                            zip(expected_directors,
                                expected_avg_scores, expected_num_movies)):

        assert (report[counter][0], report[counter][1]['score']) == (i, j)
        assert len(report[counter][1]['movies']) == k
        assert  _calc_mean(report[counter][1]['movies']) == j

    return "tests pass"

In [43]:
test()

'tests pass'