In [1]:
import numpy as np 
import pandas as pd 

In [3]:
def get_data(size=10_000):
    df = pd.DataFrame()
    size = 10_000
    df['age'] = np.random.randint(0, 100, size)
    df['time_in_bed'] = np.random.randint(0, 9, size)
    df['pct_sleeping'] = np.random.rand(size)
    df['favorite_food'] = np.random.choice(['pizza','taco','ice_cream'],size)
    df['hate_food'] = np.random.choice(['broccoli','candy corn','eggs',size])
    return df

## The Problem
Reward calculation:
- If they were in bed for more than 5 hours AND they were sleeping for more than 50% we give them their favorite food.
- Otherwise, we give them their hate food.
- If they are over 90 years old give their favorite food regardless.

In [5]:
def reward_cal(row):
    if row['age'] >= 90:
        return row['favorite_food']
    if (row['time_in_bed']>5) and row['pct_sleeping']>0.5:
        return row['favorite_food']
    return row['hate_food']

## Level 1 - Loop

In [7]:
%%timeit
df = get_data()
for index, row in df.iterrows():
    df.loc[index,'reward'] = reward_cal(row)

## Level 2 - Apply

In [8]:
%%timeit
df = get_data()
df['reward'] = df.apply(reward_cal, axis=1)

## Level 3 - Vectorized

In [13]:
%%timeit
df = get_data()
df['reward'] = df['hate_food']
df.loc[((df['pct_sleeping']>0.5) & \
         (df['time_in_bed']>5)) | \
         (df['age']>90), 'reward'] = df['favorite_food']

# Plot Differences

In [14]:
results = pd.DataFrame(
    [
        ['loop',3500,48.3],
        ['apply',192,6.34],
        ['vectorized',1.36,0.00896]
    ],
    columns=['type','mean','std']
)

In [15]:
results.set_index('type')['mean'].plot(kind='bar')