# Vectorization
## Load some data

In [1]:
from trader.common.utils import *
symbols = ["SPY", "AAPL", "AMZN"]
df = load_minute_data(symbols, start_date="20150101", end_date="20200101")
close_prices = [f"{symbol}_close" for symbol in symbols]
df[close_prices]

Unnamed: 0_level_0,SPY_close,AAPL_close,AMZN_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01-02 09:30:00,206.39,111.24,312.58
2015-01-02 09:31:00,206.44,111.30,312.95
2015-01-02 09:32:00,206.66,111.30,313.53
2015-01-02 09:33:00,206.73,111.18,313.65
2015-01-02 09:34:00,206.62,111.04,313.29
...,...,...,...
2019-12-31 15:55:00,321.93,293.58,1848.43
2019-12-31 15:56:00,321.77,293.44,1846.96
2019-12-31 15:57:00,321.71,293.39,1846.56
2019-12-31 15:58:00,321.71,293.51,1847.41


## Compute if SPY and AAPL together cost more than AMZN in a simple loop

In [2]:
df["total_cost_slow"] = 0
for idx, data in df.iterrows():
  df.loc[idx, "total_cost_slow"] = (data.loc[idx, "SPY_close"] + data.loc[idx, "AAPL_close"]) > data.loc[idx, "AMZN_close"]
df["total_cost_slow"]

date
2015-01-02 09:30:00     True
2015-01-02 09:31:00     True
2015-01-02 09:32:00     True
2015-01-02 09:33:00     True
2015-01-02 09:34:00     True
                       ...  
2019-12-31 15:55:00    False
2019-12-31 15:56:00    False
2019-12-31 15:57:00    False
2019-12-31 15:58:00    False
2019-12-31 15:59:00    False
Name: total_cost_slow, Length: 486330, dtype: object

## Speed up code by vectorizing it

In [3]:
df["total_cost_fast"] =  (df["SPY_close"] + df["AAPL_close"]) > df["AMZN_close"]
df["total_cost_fast"]

date
2015-01-02 09:30:00     True
2015-01-02 09:31:00     True
2015-01-02 09:32:00     True
2015-01-02 09:33:00     True
2015-01-02 09:34:00     True
                       ...  
2019-12-31 15:55:00    False
2019-12-31 15:56:00    False
2019-12-31 15:57:00    False
2019-12-31 15:58:00    False
2019-12-31 15:59:00    False
Name: total_cost_fast, Length: 486330, dtype: bool

In [4]:
assert (df["total_cost_fast"] == df["total_cost_slow"]).all()