#### Imports

In [None]:
import pandas as pd
import numpy as np

#### Generators

Generator functions are a special kind of function that return a lazy iterator. These are objects that you can loop over like a list. However, unlike lists, lazy iterators do not store their contents in memory.

In [None]:
def infinite_sequence():
    num = 0
    while True:
        yield num
        num += 1

In [None]:
gen = infinite_sequence()

In [None]:
next(gen)

In [None]:
next(gen)

#### Decorators

For a very nice resource, see: https://realpython.com/primer-on-python-decorators/



In [None]:
def my_decorator(func):
    def wrapper():
        print("Something is happening before the function is called.")
        func()
        print("Something is happening after the function is called.")
    return wrapper

def say_whee():
    print("Whee!")

say_whee = my_decorator(say_whee)

In [None]:
say_whee()

This way of defining the function is a bit clunky. 

Instead, Python allows you to use decorators in a simpler way with the `@` symbol:

In [None]:
def my_decorator(func):
    def wrapper():
        print("Something is happening before the function is called.")
        func()
        print("Something is happening after the function is called.")
    return wrapper

@my_decorator
def say_whee():
    print("Whee!")

In [None]:
say_whee()

#### What is a use for this?

Often when writing a pipeline it's handy to know the size of the dataframe at different stages. 
We can easily and neatly track this with a decorator:

In [None]:
# Decorator to print dataframe size
def give_me_the_size(method):
    def df_size(*args, **kw):
        df = method(*args, **kw)
        print(f'Shape after function: {method.__name__, df.shape}')
        return df
    return df_size

In [None]:
def read_data(path):
    # What is this function missing?
    return pd.read_csv(path)

def copy_df(df):
    # What is this function missing?
    return df.copy()

In [None]:
df = read_data(path='../../data/UCI_Credit_Card.csv')

In [None]:
df.head()

In [None]:
@give_me_the_size
def filter_on_sex(df, value=2):
    return df[df['SEX'] == value]

@give_me_the_size
def filter_on_education(df, value=1):
    return df[df['EDUCATION']==value]

@give_me_the_size
def filter_on_age(df, value=35):
    return df[df['AGE'] > value]

In [None]:
df = read_data(path='../../data/UCI_Credit_Card.csv')

df = (
    copy_df(df)
    .pipe(filter_on_sex)
    .pipe(filter_on_education)
    .pipe(filter_on_age)
)

#### Important package to know: sci-kit lego

In [None]:
from sklego.pandas_utils import log_step

In [None]:
@log_step
def filter_on_sex(df, value=2):
    return df[df['SEX'] == value]

@log_step
def filter_on_education(df, value=1):
    return df[df['EDUCATION']==value]

@log_step
def filter_on_age(df, value=35):
    return df[df['AGE'] > value]

In [None]:
import logging

logging.basicConfig(level=logging.DEBUG)

df = read_data(path='../../data/UCI_Credit_Card.csv')

df = (
    copy_df(df)
    .pipe(filter_on_sex)
    .pipe(filter_on_education)
    .pipe(filter_on_age)
)