# TDD with Assert
A quick introduction for how to use assert in Jupyter Notebooks to take advantage of Test Driven Development.

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

The assert() function tests whether a condition is true. If not, it will raise an AssertionError. This makes it a beneficial tool for debugging code:

In [15]:
assert(5 < 6)
assert(10 > 20)

AssertionError: 

Importing and preparing the example dataset.

This dataset contains imdb data from 2006 to 2016 and has been downloaded from https://www.kaggle.com/PromptCloudHQ/imdb-data.

In [16]:
# import data
imdb_df = pd.read_csv("imdb_movie_dataset.csv")
display(imdb_df.head())


Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


In [17]:
# create seaparate row for each title and actor
imdb_actors_df = imdb_df[['Title', 'Actors']]
imdb_actors_df['Actors'] = imdb_actors_df['Actors'].apply(lambda x: x.split(','))
imdb_actors_df = imdb_actors_df.set_index('Title').Actors.apply(pd.Series).stack().reset_index(level=0).rename(columns={0:'Actors'})

imdb_movies_df = imdb_df[['Title', 'Director', 'Year', 'Runtime (Minutes)', 'Rating', 'Revenue (Millions)']]

# merge the movie information such as revenue, runtime etc. back to the table
imdb_actors_df = imdb_actors_df.merge(imdb_movies_df, on='Title', how='inner')
imdb_actors_df = imdb_actors_df.rename(columns={'Runtime (Minutes)':'Runtime', 'Revenue (Millions)':'Revenue'})
imdb_actors_df.columns = map(str.lower, imdb_actors_df.columns)

display(imdb_actors_df.sample(5).head())


Unnamed: 0,title,actors,director,year,runtime,rating,revenue
1166,Masterminds,Owen Wilson,Jared Hess,2016,95,5.8,17.36
2550,Knocked Up,Leslie Mann,Judd Apatow,2007,129,7.0,148.73
3818,Sex and the City 2,Cynthia Nixon,Michael Patrick King,2010,146,4.3,95.33
2794,10 Years,Jenna Dewan Tatum,Jamie Linden,2011,100,6.1,0.2
1088,Satanic,Sarah Hyland,Jeffrey G. Hunt,2016,85,3.7,


The task: Get a table with each actors last movie.

In [18]:
# check values for one actor to define a testcase.
# Brad Pitt's last movie was 'Allied' which can be used for the testcase.
display(imdb_actors_df[imdb_actors_df['actors'] == 'Brad Pitt'].sort_values('year'))

Unnamed: 0,title,actors,director,year,runtime,rating,revenue
3547,Babel,Brad Pitt,Alejandro González Iñárritu,2006,143,7.5,34.3
2464,The Assassination of Jesse James by the Coward...,Brad Pitt,Andrew Dominik,2007,160,7.5,3.9
1712,The Curious Case of Benjamin Button,Brad Pitt,David Fincher,2008,166,7.8,127.49
308,Inglourious Basterds,Brad Pitt,Quentin Tarantino,2009,153,8.3,120.52
2348,Moneyball,Brad Pitt,Bennett Miller,2011,133,7.6,75.61
3551,The Tree of Life,Brad Pitt,Terrence Malick,2011,139,6.8,13.3
1740,World War Z,Brad Pitt,Marc Forster,2013,116,7.0,202.35
972,Fury,Brad Pitt,David Ayer,2014,134,7.6,85.71
284,Allied,Brad Pitt,Robert Zemeckis,2016,124,7.1,40.07


In [19]:
# create table with last movies
imdb_actors_df['movie_number'] = imdb_actors_df.groupby("actors")["year"].rank("dense")
last_movies_df = imdb_actors_df[imdb_actors_df['movie_number'] == 1]

display(last_movies_df.head())

Unnamed: 0,title,actors,director,year,runtime,rating,revenue,movie_number
0,Guardians of the Galaxy,Chris Pratt,James Gunn,2014,121,8.1,333.13,1.0
1,Guardians of the Galaxy,Vin Diesel,James Gunn,2014,121,8.1,333.13,1.0
4,Prometheus,Noomi Rapace,Ridley Scott,2012,124,7.0,126.46,1.0
5,Prometheus,Logan Marshall-Green,Ridley Scott,2012,124,7.0,126.46,1.0
9,Split,Anya Taylor-Joy,M. Night Shyamalan,2016,117,7.3,138.12,1.0


In [20]:
assert(len(last_movies[(last_movies['actors'] == 'Brad Pitt') & (last_movies['title'] == 'Allied')]) == 1)

NameError: name 'last_movies' is not defined

In [21]:
# create table with last movies
imdb_actors_df['movie_number_desc'] = imdb_actors_df.groupby("actors")["year"].rank("dense", ascending=False)
last_movies_desc_df = imdb_actors_df[imdb_actors_df['movie_number_desc'] == 1]

display(last_movies_desc_df.head())

Unnamed: 0,title,actors,director,year,runtime,rating,revenue,movie_number,movie_number_desc
1,Guardians of the Galaxy,Vin Diesel,James Gunn,2014,121,8.1,333.13,1.0,1.0
5,Prometheus,Logan Marshall-Green,Ridley Scott,2012,124,7.0,126.46,1.0,1.0
8,Split,James McAvoy,M. Night Shyamalan,2016,117,7.3,138.12,3.0,1.0
9,Split,Anya Taylor-Joy,M. Night Shyamalan,2016,117,7.3,138.12,1.0,1.0
10,Split,Haley Lu Richardson,M. Night Shyamalan,2016,117,7.3,138.12,1.0,1.0


In [22]:
assert(len(last_movies_desc_df[(last_movies_desc_df['actors'] == 'Brad Pitt') \
                               & (last_movies_desc_df['title'] == 'Allied')]) == 1)