In [35]:
import pandas as pd
import polars as pl

from factorlib.utils.system import get_data_dir
from factorlib.utils.helpers import resample

In [36]:
raw_data_dir = get_data_dir() / 'raw'
ohclv_raw = pl.scan_csv(raw_data_dir / 'ohclv_daily.csv', try_parse_dates=True).collect(streaming=True)
ohclv_raw

GVKEY,iid,date_index,ticker,conm,divd,cshtrd,eps,busdesc,gind,gsector,gsubind,sic,prcod,prchd,prcld,prccd
i64,i64,date,str,str,str,f64,f64,str,i64,i64,i64,i64,str,f64,f64,f64
126554,1,2002-01-02,"""A""","""AGILENT TECHNO…",,2.1593e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,29.34,28.46,29.25
126554,1,2002-01-03,"""A""","""AGILENT TECHNO…",,3.2606e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,31.2,29.42,31.1
126554,1,2002-01-04,"""A""","""AGILENT TECHNO…",,5.1182e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,32.94,31.65,32.78
126554,1,2002-01-07,"""A""","""AGILENT TECHNO…",,3.8092e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,32.89,32.2,32.65
126554,1,2002-01-08,"""A""","""AGILENT TECHNO…",,2.4954e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,32.89,32.25,32.75
126554,1,2002-01-09,"""A""","""AGILENT TECHNO…",,2.1168e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,33.3,31.69,31.97
126554,1,2002-01-10,"""A""","""AGILENT TECHNO…",,1.2381e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,31.97,31.3,31.72
126554,1,2002-01-11,"""A""","""AGILENT TECHNO…",,1.4162e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,31.9,30.71,31.04
126554,1,2002-01-14,"""A""","""AGILENT TECHNO…",,2.2298e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,30.62,30.07,30.42
126554,1,2002-01-15,"""A""","""AGILENT TECHNO…",,1.86e6,0.38,"""Agilent Techno…",352030,35,35203010,3826,,31.15,30.18,30.45


In [37]:
closes_raw = ohclv_raw.lazy().select(pl.col('date_index'), pl.col('ticker'), pl.col('prccd')).collect(streaming=True)
closes_raw = closes_raw.sort(['ticker', 'date_index'])
closes_raw

date_index,ticker,prccd
date,str,f64
2002-01-02,"""A""",29.25
2002-01-03,"""A""",31.1
2002-01-04,"""A""",32.78
2002-01-07,"""A""",32.65
2002-01-08,"""A""",32.75
2002-01-09,"""A""",31.97
2002-01-10,"""A""",31.72
2002-01-11,"""A""",31.04
2002-01-14,"""A""",30.42
2002-01-15,"""A""",30.45


In [38]:
returns = closes_raw.to_pandas().set_index('date_index')
returns['ret'] = returns.groupby('ticker')['prccd'].pct_change()
returns

Unnamed: 0_level_0,ticker,prccd,ret
date_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2002-01-02,A,29.2500,
2002-01-03,A,31.1000,0.063248
2002-01-04,A,32.7800,0.054019
2002-01-07,A,32.6500,-0.003966
2002-01-08,A,32.7500,0.003063
...,...,...,...
2023-02-27,ZYNE,0.5190,-0.001923
2023-02-28,ZYNE,0.4620,-0.109827
2023-03-01,ZYNE,0.4629,0.001948
2023-03-02,ZYNE,0.4890,0.056384


In [39]:
with_mom_season = returns
with_mom_season['ret'] = with_mom_season['ret'].fillna(0)
with_mom_season['MomSeasonShort'] = with_mom_season['ret'].shift(364)
with_mom_season.dropna(inplace=True)
with_mom_season

Unnamed: 0_level_0,ticker,prccd,ret,MomSeasonShort
date_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-06-13,A,18.9200,0.004246,0.000000
2003-06-16,A,19.2400,0.016913,0.063248
2003-06-17,A,19.2400,0.000000,0.054019
2003-06-18,A,19.5600,0.016632,-0.003966
2003-06-19,A,19.2600,-0.015337,0.003063
...,...,...,...,...
2023-02-27,ZYNE,0.5190,-0.001923,0.009281
2023-02-28,ZYNE,0.4620,-0.109827,-0.006897
2023-03-01,ZYNE,0.4629,0.001948,-0.020833
2023-03-02,ZYNE,0.4890,0.056384,-0.014184


In [40]:
mom_season_short_daily = with_mom_season[['ticker', 'MomSeasonShort']]
mom_season_short_daily.rename(columns={'MomSeasonShort': 'mom_season_short_daily'}, inplace=True)
mom_season_short_daily


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0_level_0,ticker,mom_season_short_daily
date_index,Unnamed: 1_level_1,Unnamed: 2_level_1
2003-06-13,A,0.000000
2003-06-16,A,0.063248
2003-06-17,A,0.054019
2003-06-18,A,-0.003966
2003-06-19,A,0.003063
...,...,...
2023-02-27,ZYNE,0.009281
2023-02-28,ZYNE,-0.006897
2023-03-01,ZYNE,-0.020833
2023-03-02,ZYNE,-0.014184


In [41]:
momentum_data_dir = get_data_dir() / 'momentum'
mom_season_short_daily.to_csv(momentum_data_dir / 'mom_season_short_daily.csv')