In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import datetime as dt
import pandas_datareader.data as web
import timeit

pd.set_option('display.max_columns', 15)
pd.set_option('display.max_rows', 50)
pd.options.display.float_format = '{:,.4f}'.format

In [4]:
def equal(df1, df2):
    """ Check if the corresponding values of two data frames or numpy arrays are the same. 
    
    df1, df2 : data frame or numpy array
    """
    
    df_diffs = (df1 - df2).dropna()
    if ~(abs(df_diffs) > 1e-8).sum().sum(): # 0 means the two methods give the same resutls.
        return True 
    else:
        return False

## Download data

In [7]:
# download stock prices
start = dt.date(2017, 1, 1)
end = dt.date(2022, 1, 1)
tickers = ['ABG', 'ASTI', 'DQ', 'FSLR', 'SPY']
daily_rets = web.get_data_yahoo(tickers, start, end)['Adj Close'].pct_change().dropna()
daily_rets.head()

Symbols,ABG,ASTI,DQ,FSLR,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-04,0.024,-0.1515,-0.0045,0.0188,0.0059
2017-01-05,-0.0188,-0.1107,0.0233,-0.0085,-0.0008
2017-01-06,-0.0167,-0.1566,0.0124,-0.0165,0.0036
2017-01-09,0.0081,-0.1476,0.0068,0.0075,-0.0033
2017-01-10,0.0193,-0.0503,0.0005,-0.0054,0.0


In [8]:
daily_rets.tail()

Symbols,ABG,ASTI,DQ,FSLR,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-27,0.0187,-0.1724,-0.0344,0.0056,0.0142
2021-12-28,0.0121,0.3333,0.0059,-0.0058,-0.0008
2021-12-29,-0.0031,-0.0312,0.0016,-0.0019,0.0013
2021-12-30,0.0064,-0.129,0.0811,0.0099,-0.0028
2021-12-31,0.0155,-0.0741,-0.0151,-0.01,-0.0025


## Input

In [12]:
ndays = 30
mvar = 'SPY'

## Calculate rolling z-scores

In [21]:
# fastest approach
zscores = ((daily_rets - daily_rets.rolling(ndays).mean()) / daily_rets.rolling(ndays).std(ddof=1))
zscores.iloc[25:,:].head(8)

Symbols,ABG,ASTI,DQ,FSLR,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-02-09,,,,,
2017-02-10,,,,,
2017-02-13,,,,,
2017-02-14,,,,,
2017-02-15,1.2381,-0.7776,1.9973,1.1676,1.0188
2017-02-16,-1.7967,-0.1119,-0.8171,-0.9303,-0.556
2017-02-17,1.2686,-0.3968,-0.1663,1.0334,0.086
2017-02-21,1.3899,-0.287,1.2054,2.2361,1.2405


In [22]:
%%timeit
((daily_rets - daily_rets.rolling(ndays).mean()) / daily_rets.rolling(ndays).std(ddof=1))

1.9 ms ± 40.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


**Remark: This approach is 100x faster than methods that involve `apply()`. The general lesson is that vectorization is much faster than `apply()`. Click [here](https://stackoverflow.com/questions/54432583/when-should-i-not-want-to-use-pandas-apply-in-my-code) for a discussion on `apply()`. When calculating rolling statistics, try to roll the data and perform separate aggregation on each rolled sub dataset and then use vectorized calculation to derive the final results.**

## Calculate rolling correlations

In [23]:
# calculate the rolling correlations between each stock's return series and the market's return series
pearson = daily_rets.rolling(ndays).corr(daily_rets[mvar])
kendall = daily_rets.rolling(ndays).corr(daily_rets[mvar], method='kendall')
spearman = daily_rets.rolling(ndays).corr(daily_rets[mvar], method='spearman')

In [24]:
pearson.iloc[25:,:].head(8)

Symbols,ABG,ASTI,DQ,FSLR,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-02-09,,,,,
2017-02-10,,,,,
2017-02-13,,,,,
2017-02-14,,,,,
2017-02-15,0.334,-0.2812,0.061,0.4459,1.0
2017-02-16,0.3117,-0.2557,0.0981,0.4337,1.0
2017-02-17,0.2932,-0.2702,0.1087,0.4238,1.0
2017-02-21,0.3698,-0.2624,0.1534,0.499,1.0


In [25]:
kendall.iloc[25:,:].head(8)

Symbols,ABG,ASTI,DQ,FSLR,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-02-09,,,,,
2017-02-10,,,,,
2017-02-13,,,,,
2017-02-14,,,,,
2017-02-15,0.334,-0.2812,0.061,0.4459,1.0
2017-02-16,0.3117,-0.2557,0.0981,0.4337,1.0
2017-02-17,0.2932,-0.2702,0.1087,0.4238,1.0
2017-02-21,0.3698,-0.2624,0.1534,0.499,1.0


In [26]:
spearman.iloc[25:,:].head(8)

Symbols,ABG,ASTI,DQ,FSLR,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-02-09,,,,,
2017-02-10,,,,,
2017-02-13,,,,,
2017-02-14,,,,,
2017-02-15,0.334,-0.2812,0.061,0.4459,1.0
2017-02-16,0.3117,-0.2557,0.0981,0.4337,1.0
2017-02-17,0.2932,-0.2702,0.1087,0.4238,1.0
2017-02-21,0.3698,-0.2624,0.1534,0.499,1.0


In [27]:
%%timeit
pearson = daily_rets.rolling(ndays).corr(daily_rets[mvar])

4.52 ms ± 34.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [31]:
%%timeit
pearson = daily_rets.rolling(ndays).apply(lambda x: x.corr(daily_rets[mvar]))

3.51 s ± 18.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


**Remark: calling `.corr()` directly is 1000x faster than calling it inside of `apply()`!**