In [1]:
import pandas as pd
import numpy as np
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 10 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [2]:
wrds_data = pd.read_csv('../data/wrds-data.csv')
wrds_data = wrds_data.reset_index()
wrds_data

Unnamed: 0,index,GVKEY,iid,datadate,tic,conm,divd,ajexdi,cshtrd,eps,prccd,prchd,prcld,prcod,busdesc,gind,gsector,gsubind,sic
0,0,1004,1,2013/10/24,AIR,AAR CORP,,1.0,351238.0,1.37,29.38,29.67,29.08,29.21,AAR Corp. provides products and services to co...,201010.0,20.0,20101010.0,5080
1,1,1004,1,2002/03/01,AIR,AAR CORP,,1.0,75300.0,-1.60,7.44,7.45,7.15,,AAR Corp. provides products and services to co...,201010.0,20.0,20101010.0,5080
2,2,1004,1,2007/11/30,AIR,AAR CORP,,1.0,354300.0,1.70,33.02,33.75,32.81,32.99,AAR Corp. provides products and services to co...,201010.0,20.0,20101010.0,5080
3,3,1004,1,2015/06/30,AIR,AAR CORP,,1.0,248059.0,1.23,31.87,32.17,31.73,32.10,AAR Corp. provides products and services to co...,201010.0,20.0,20101010.0,5080
4,4,1004,1,2019/11/08,AIR,AAR CORP,,1.0,106407.0,2.37,43.35,44.29,43.14,44.08,AAR Corp. provides products and services to co...,201010.0,20.0,20101010.0,5080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32127389,32127389,353444,90,2022/07/28,HLN,HALEON PLC,,1.0,15271880.0,,7.00,7.24,6.97,7.20,Haleon plc engages in the research and develop...,303020.0,30.0,30302010.0,2834
32127390,32127390,353444,90,2022/07/27,HLN,HALEON PLC,,1.0,15006760.0,,7.32,7.56,7.23,7.51,Haleon plc engages in the research and develop...,303020.0,30.0,30302010.0,2834
32127391,32127391,353444,90,2022/07/26,HLN,HALEON PLC,,1.0,27235290.0,,7.35,7.41,7.29,7.31,Haleon plc engages in the research and develop...,303020.0,30.0,30302010.0,2834
32127392,32127392,353444,90,2022/07/25,HLN,HALEON PLC,,1.0,19349790.0,,7.30,7.45,7.16,7.41,Haleon plc engages in the research and develop...,303020.0,30.0,30302010.0,2834


In [4]:
relevant_data = wrds_data.loc[:, ['datadate', 'tic', 'gind', 'prccd', 'ajexdi']]
relevant_data.loc[:, 'prccd'] = relevant_data.loc[:, 'ajexdi'] * relevant_data.loc[:, 'prccd']
relevant_data = relevant_data.drop(columns='ajexdi')

# filter non alpha tickers
pattern = r'^[a-zA-Z]+$'
mask = relevant_data['tic'].str.match(pattern)
mask = mask.fillna(False)
relevant_data = relevant_data.loc[mask]
relevant_data

Unnamed: 0,datadate,tic,gind,prccd
0,2013/10/24,AIR,201010.0,29.38
1,2002/03/01,AIR,201010.0,7.44
2,2007/11/30,AIR,201010.0,33.02
3,2015/06/30,AIR,201010.0,31.87
4,2019/11/08,AIR,201010.0,43.35
...,...,...,...,...
32127389,2022/07/28,HLN,303020.0,7.00
32127390,2022/07/27,HLN,303020.0,7.32
32127391,2022/07/26,HLN,303020.0,7.35
32127392,2022/07/25,HLN,303020.0,7.30


In [7]:
tickers_unstacked = relevant_data.set_index(['datadate', 'tic'])
tickers_unstacked['gind'] = tickers_unstacked['gind'].astype(str)
tickers_unstacked = tickers_unstacked.unstack(level=1).swaplevel(0, 1, axis=1)
tickers_unstacked.index = pd.to_datetime(tickers_unstacked.index)
tickers_returns = tickers_unstacked.swaplevel(0, 1, axis=1)
tickers_returns['prccd'] = tickers_returns['prccd'].pct_change()
tickers_returns = tickers_unstacked.swaplevel(0, 1, axis=1)
short_window = 20
tickers_momentum_short = tickers_unstacked.rolling(short_window).sum()
tickers_momentum_short

tic,A,AA,AAAGY,AAAP,AABA,AABC,AAC,AACB,AACC,AACG,...,ZVUE,ZVXI,ZWRK,ZWS,ZXAIY,ZY,ZYME,ZYNE,ZYXI,ZZ
Unnamed: 0_level_1,gind,gind,gind,gind,gind,gind,gind,gind,gind,gind,...,prccd,prccd,prccd,prccd,prccd,prccd,prccd,prccd,prccd,prccd
datadate,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2002-01-02,,,,,,,,,,,...,,,,,,,,,,
2002-01-03,,,,,,,,,,,...,,,,,,,,,,
2002-01-04,,,,,,,,,,,...,,,,,,,,,,
2002-01-07,,,,,,,,,,,...,,,,,,,,,,
2002-01-08,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-27,,,,,,,,,,,...,,,,,,,,,,
2023-03-28,,,,,,,,,,,...,,,,,,,,,,
2023-03-29,,,,,,,,,,,...,,,,,,,,,,
2023-03-30,,,,,,,,,,,...,,,,,,,,,,



KeyboardInterrupt



In [2]:
industry_organized = tickers_momentum_short.stack(level=0).reset_index().set_index(['datadate','gind'])
industry_average_momentum = industry_organized.groupby('gind')['prccd'].rolling(short_window).mean()
industry_average_momentum = industry_average_momentum.reset_index(level=2)
industry_average_momentum = industry_average_momentum.drop(columns=['gind'])
industry_average_momentum = industry_average_momentum.rename(columns={'prccd': 'industry_means'})
industry_average_momentum

NameError: name 'tickers_momentum_short' is not defined

In [1]:
industry_organized = industry_organized.reset_index()
industry_relative_momentum = industry_organized.merge(industry_average_momentum, on=['gind', 'datadate'])
industry_relative_momentum['industry_relative'] = industry_relative_momentum['prccd'] - industry_relative_momentum['industry_means']
industry_relative_momentum

NameError: name 'industry_organized' is not defined