In [598]:
import pandas as pd
import numpy as np

In [599]:
def get_data(size = 10_000):
    df = pd.DataFrame()
    df['age'] = np.random.randint(0, 100, size)
    df['time_in_bed'] = np.random.randint(0, 9, size)
    df['pct_sleeping'] = np.random.rand(size)
    df['favorite_food'] = np.random.choice(['pizza', 'taco', 'ice_cream'], size)
    df['hate_food'] = np.random.choice(['broccoli', 'candycorn', 'eggs'], size)
    return df

In [600]:
# get_data()

## The Problem
Reward calculation:
- If they were in bed for more than 5 hours AND they were sleeping for more than 50% we give them their favorite food.
- Otherwise we give them their hate food.
If they are over 90 years old give their favorite food regardless.|

In [601]:
def reward_calc(row):
    if row['age'] >=90:
        return row['favorite_food']
    if (row['time_in_bed'] > 5) & (row['pct_sleeping'] > 0.5):
        return row['favorite_food']
    return row['hate_food']

## Level 1 - Loop 

In [602]:
# %%timeit
df = get_data()
for index, row in df.iterrows():
    df.loc[index, 'reward'] = reward_calc(row)

## level 2 - Apply

In [603]:
# %%timeit
df = get_data()
df["reward"] = df.apply(reward_calc, axis=1)

## Level 3 - Vectorized

In [604]:
# %%timeit
df = get_data()
df['reward'] = df["hate_food"]
df.loc[
    ((df['pct_sleeping'] > 0.5) & (df['time_in_bed'] > 5)) | 
    (df['age'] > 90), 'reward'] = df['favorite_food']

In [605]:
df['reward_outcome'] = ''
# df.shape

In [606]:
# df2 = df.copy()
# df2.shape

In [607]:
# df3 = df.copy()
# df3.shape

In [613]:
df2 = (df.query('favorite_food == reward'))
df2 = df.assign(reward_outcome = 'fav_food')
df2

Unnamed: 0,age,time_in_bed,pct_sleeping,favorite_food,hate_food,reward,reward_outcome
0,88,4,0.345000,pizza,broccoli,broccoli,fav_food
1,62,8,0.140565,ice_cream,eggs,eggs,fav_food
2,54,4,0.205837,ice_cream,candycorn,candycorn,fav_food
3,52,2,0.258523,ice_cream,candycorn,candycorn,fav_food
4,54,2,0.343497,ice_cream,broccoli,broccoli,fav_food
...,...,...,...,...,...,...,...
9991,16,8,0.110547,taco,broccoli,broccoli,fav_food
9993,49,1,0.061246,pizza,broccoli,broccoli,fav_food
9996,50,0,0.794728,ice_cream,candycorn,candycorn,fav_food
9998,76,0,0.859791,pizza,candycorn,candycorn,fav_food


In [615]:
df3 = (df.query('favorite_food != reward', inplace=True))
df3 = df.assign(reward_outcome = 'hate_food')
df3

Unnamed: 0,age,time_in_bed,pct_sleeping,favorite_food,hate_food,reward,reward_outcome
0,88,4,0.345000,pizza,broccoli,broccoli,hate_food
1,62,8,0.140565,ice_cream,eggs,eggs,hate_food
2,54,4,0.205837,ice_cream,candycorn,candycorn,hate_food
3,52,2,0.258523,ice_cream,candycorn,candycorn,hate_food
4,54,2,0.343497,ice_cream,broccoli,broccoli,hate_food
...,...,...,...,...,...,...,...
9991,16,8,0.110547,taco,broccoli,broccoli,hate_food
9993,49,1,0.061246,pizza,broccoli,broccoli,hate_food
9996,50,0,0.794728,ice_cream,candycorn,candycorn,hate_food
9998,76,0,0.859791,pizza,candycorn,candycorn,hate_food


In [616]:
reward = pd.concat([df2, df3], axis=0)
reward.head(10000)

Unnamed: 0,age,time_in_bed,pct_sleeping,favorite_food,hate_food,reward,reward_outcome
0,88,4,0.345000,pizza,broccoli,broccoli,fav_food
1,62,8,0.140565,ice_cream,eggs,eggs,fav_food
2,54,4,0.205837,ice_cream,candycorn,candycorn,fav_food
3,52,2,0.258523,ice_cream,candycorn,candycorn,fav_food
4,54,2,0.343497,ice_cream,broccoli,broccoli,fav_food
...,...,...,...,...,...,...,...
3137,35,7,0.244942,taco,broccoli,broccoli,hate_food
3138,45,6,0.326504,taco,broccoli,broccoli,hate_food
3139,69,8,0.151091,pizza,candycorn,candycorn,hate_food
3141,50,5,0.328732,ice_cream,eggs,eggs,hate_food
