In [1]:
# Importing all necessary libraries

import numpy as np

import pandas as pd

In [2]:
# Loading in the daily and monthly crsp datasets

crsp_daily = pd.read_feather('~/FIN_585/crsp_data/crsp_daily.ftr')

crsp_monthly = pd.read_feather('~/FIN_585/crsp_data/crsp_monthly.ftr')

In [3]:
# Cleaning the daily dataset

crsp_daily['prc'] = abs(crsp_daily['prc'])

crsp_daily['prc_lag'] = crsp_daily.groupby('permno')['prc'].shift(1)

crsp_daily = crsp_daily[crsp_daily['ret'] > -1]

crsp_daily = crsp_daily[crsp_daily['prc_lag'] > 5]

crsp_daily.sort_values(by = ['permno', 'caldt'], inplace = True)

crsp_daily.drop(columns = ['shrcd', 'excd', 'siccd', 'vol', 'shr', 'prc_lag'], inplace = True)

In [4]:
# Adding column for positive and negative returns

crsp_daily['ret_class'] = np.where(crsp_daily['ret'].shift(1) >= 0, '1', '0')

In [5]:
# Calculate rolling yearly number of positive and negative days for each stock

n = 252

crsp_daily['pos_days'] = crsp_daily.groupby('permno').rolling(window = n, min_periods = n)['ret_class'].sum().reset_index(level=0, drop=True)

crsp_daily['neg_days'] = n - crsp_daily['pos_days']

crsp_daily['%pos'] = crsp_daily['pos_days'] / n

crsp_daily['%neg'] = crsp_daily['neg_days'] / n

crsp_daily['%neg - %pos'] = crsp_daily['%neg'] - crsp_daily['%pos']

crsp_daily.drop(columns = ['ret_class', 'pos_days', 'neg_days', '%pos', '%neg'], inplace = True)

crsp_daily.dropna(inplace = True)

In [6]:
crsp_daily

Unnamed: 0,permno,caldt,prc,ret,%neg - %pos
614,10001,1987-01-08,7.00000,0.076923,-0.611111
615,10001,1987-01-09,7.00000,0.000000,-0.619048
616,10001,1987-01-12,7.00000,0.000000,-0.619048
617,10001,1987-01-13,6.75000,-0.035714,-0.619048
618,10001,1987-01-14,6.75000,0.000000,-0.611111
...,...,...,...,...,...
105258375,93436,2023-12-22,252.53999,-0.007701,-0.095238
105258376,93436,2023-12-26,256.60999,0.016116,-0.095238
105258377,93436,2023-12-27,261.44000,0.018822,-0.103175
105258378,93436,2023-12-28,253.17999,-0.031594,-0.111111
