# Profiling Python code

### Example: Find duplicate movie titles

- Read 5000 movie titles
- Return a list of movie titles that occur twice
- Search is case insensitive

In [39]:
def read_movies():
    """ Read movies name from movies_name.txt file"""
    
    path = "movies_name.txt"
    with open(path) as fd:
        return fd.read().splitlines()

In [40]:
def is_duplicate(name , movie_list):
    
    for movie_name in movie_list:
        if movie_name.lower() == name.lower():
            return True
        
    return False

In [41]:
def find_duplicate_movies():
    
    movies_data = read_movies()
    duplicates = []
    while movies_data:
        movie = movies_data.pop()
        if is_duplicate(movie , movies_data):
            duplicates.append(movie)
    
    return duplicates

In [42]:
%time find_duplicate_movies()

CPU times: user 2.05 s, sys: 4.69 ms, total: 2.05 s
Wall time: 2.05 s


['Sabotage',
 'The Last House on the Left',
 'Night of the Living Dead',
 "A Dog's Breakfast",
 'Cat People',
 'The Texas Chain Saw Massacre',
 'The Calling',
 'Side Effects',
 '20,000 Leagues Under the Sea',
 'The Love Letter',
 'Across the Universe',
 'Halloween',
 'Oz the Great and Powerful',
 'Crossroads',
 'Home',
 'House of Wax',
 'King Kong',
 'The Unborn',
 'Snitch',
 'Trance',
 'The Lovely Bones',
 'History of the World: Part I',
 'Goosebumps',
 'The Full Monty',
 'Juno',
 'The Fog',
 'Lucky Number Slevin',
 'The Day the Earth Stood Still',
 'Mercury Rising',
 'Jack Reacher',
 'A Woman, a Gun and a Noodle Shop',
 'Dawn of the Dead',
 'A Nightmare on Elm Street',
 'Carrie',
 'Stealing Harvard',
 'Hamlet',
 'Lolita',
 'The French Connection',
 'Dodgeball: A True Underdog Story',
 'Halloween II',
 "The Astronaut's Wife",
 'The Omen',
 'Cinderella',
 'Alice in Wonderland',
 'The Gambler',
 'The Watch',
 'Day of the Dead',
 'Unknown',
 'Ben-Hur',
 'My Soul to Take',
 'Planet of the

## A profiling decorator

- Apply to a function with @profile
- Profiles the function using cProfile, and prints out a report
- Adapted from the Python 3.6 docs:
- https://docs.python.org/3/library/profile.html#profile.Profile

In [43]:
import cProfile, pstats, io


def profile(func):
    """A decorator that uses cProfile to profile a function"""
    
    def inner(*args, **kwargs):
        
        pr = cProfile.Profile()
        pr.enable()
        retval = func(*args, **kwargs)
        pr.disable()
        s = io.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())
        return retval

    return inner

In [44]:
@profile
def find_duplicate_movies():
    
    movies_data = read_movies()
    duplicates = []
    while movies_data:
        movie = movies_data.pop()
        if is_duplicate(movie , movies_data):
            duplicates.append(movie)
    
    return duplicates

In [47]:
find_duplicate_movies()

         24980393 function calls in 4.660 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.002    0.002    4.660    4.660 <ipython-input-44-89e89d34e6a9>:1(find_duplicate_movies)
     5043    2.702    0.001    4.655    0.001 <ipython-input-40-0c606ccca343>:1(is_duplicate)
 24970168    1.953    0.000    1.953    0.000 {method 'lower' of 'str' objects}
        1    0.000    0.000    0.002    0.002 <ipython-input-39-c7689e016564>:1(read_movies)
     5043    0.001    0.000    0.001    0.000 {method 'pop' of 'list' objects}
        1    0.001    0.001    0.001    0.001 {method 'splitlines' of 'str' objects}
        1    0.001    0.001    0.001    0.001 {built-in method io.open}
        1    0.000    0.000    0.000    0.000 {method 'read' of '_io.TextIOWrapper' objects}
        1    0.000    0.000    0.000    0.000 /Users/ikram/.pyenv/versions/3.6.6/lib/python3.6/codecs.py:318(decode)
        1    0.000    0.000  

['Sabotage',
 'The Last House on the Left',
 'Night of the Living Dead',
 "A Dog's Breakfast",
 'Cat People',
 'The Texas Chain Saw Massacre',
 'The Calling',
 'Side Effects',
 '20,000 Leagues Under the Sea',
 'The Love Letter',
 'Across the Universe',
 'Halloween',
 'Oz the Great and Powerful',
 'Crossroads',
 'Home',
 'House of Wax',
 'King Kong',
 'The Unborn',
 'Snitch',
 'Trance',
 'The Lovely Bones',
 'History of the World: Part I',
 'Goosebumps',
 'The Full Monty',
 'Juno',
 'The Fog',
 'Lucky Number Slevin',
 'The Day the Earth Stood Still',
 'Mercury Rising',
 'Jack Reacher',
 'A Woman, a Gun and a Noodle Shop',
 'Dawn of the Dead',
 'A Nightmare on Elm Street',
 'Carrie',
 'Stealing Harvard',
 'Hamlet',
 'Lolita',
 'The French Connection',
 'Dodgeball: A True Underdog Story',
 'Halloween II',
 "The Astronaut's Wife",
 'The Omen',
 'Cinderella',
 'Alice in Wonderland',
 'The Gambler',
 'The Watch',
 'Day of the Dead',
 'Unknown',
 'Ben-Hur',
 'My Soul to Take',
 'Planet of the

In [48]:

def is_duplicate(name , movie_list):
    
    for movie_name in movie_list:
        if movie_name == name:
            return True
        
    return False


@profile
def find_duplicate_movies():
    
    movies_data = read_movies()
    movies_data = [movie.lower() for movie in movies_data]
    duplicates = []
    while movies_data:
        movie = movies_data.pop()
        if is_duplicate(movie , movies_data):
            duplicates.append(movie)
    
    return duplicates

In [49]:
find_duplicate_movies()

         15269 function calls in 0.303 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.003    0.003    0.303    0.303 <ipython-input-48-277b10f9d8dc>:11(find_duplicate_movies)
     5043    0.296    0.000    0.296    0.000 <ipython-input-48-277b10f9d8dc>:2(is_duplicate)
        1    0.001    0.001    0.002    0.002 <ipython-input-48-277b10f9d8dc>:15(<listcomp>)
        1    0.000    0.000    0.002    0.002 <ipython-input-39-c7689e016564>:1(read_movies)
        1    0.001    0.001    0.001    0.001 {built-in method io.open}
     5043    0.001    0.000    0.001    0.000 {method 'lower' of 'str' objects}
        1    0.001    0.001    0.001    0.001 {method 'splitlines' of 'str' objects}
     5043    0.001    0.000    0.001    0.000 {method 'pop' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'read' of '_io.TextIOWrapper' objects}
        1    0.000    0.000    0.000    0.000 /Users/ik

['sabotage',
 'the last house on the left',
 'night of the living dead',
 "a dog's breakfast",
 'cat people',
 'the texas chain saw massacre',
 'the calling',
 'side effects',
 '20,000 leagues under the sea',
 'the love letter',
 'across the universe',
 'halloween',
 'oz the great and powerful',
 'crossroads',
 'home',
 'house of wax',
 'king kong',
 'the unborn',
 'snitch',
 'trance',
 'the lovely bones',
 'history of the world: part i',
 'goosebumps',
 'the full monty',
 'juno',
 'the fog',
 'lucky number slevin',
 'the day the earth stood still',
 'mercury rising',
 'jack reacher',
 'a woman, a gun and a noodle shop',
 'dawn of the dead',
 'a nightmare on elm street',
 'carrie',
 'stealing harvard',
 'hamlet',
 'lolita',
 'the french connection',
 'dodgeball: a true underdog story',
 'halloween ii',
 "the astronaut's wife",
 'the omen',
 'cinderella',
 'alice in wonderland',
 'the gambler',
 'the watch',
 'day of the dead',
 'unknown',
 'ben-hur',
 'my soul to take',
 'planet of the

In [50]:
@profile
def find_duplicate_movies():
    
    movies_data = read_movies()
    movies_data = [movie.lower() for movie in movies_data]
    duplicates = []
    while movies_data:
        movie = movies_data.pop()
        if movie in movies:
            duplicates.append(movie)
    
    return duplicates

In [51]:
find_duplicate_movies()

         10099 function calls in 0.004 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    0.004    0.004 <ipython-input-50-493a5f5a5ec4>:1(find_duplicate_movies)
        1    0.000    0.000    0.001    0.001 <ipython-input-39-c7689e016564>:1(read_movies)
        1    0.001    0.001    0.001    0.001 <ipython-input-50-493a5f5a5ec4>:5(<listcomp>)
     5043    0.001    0.000    0.001    0.000 {method 'lower' of 'str' objects}
        1    0.001    0.001    0.001    0.001 {method 'splitlines' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {method 'read' of '_io.TextIOWrapper' objects}
     5043    0.000    0.000    0.000    0.000 {method 'pop' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {built-in method io.open}
        1    0.000    0.000    0.000    0.000 /Users/ikram/.pyenv/versions/3.6.6/lib/python3.6/codecs.py:318(decode)
        1    0.000    0.000    0.0

[]