In [142]:
import pandas as pd

# CRSP/COMPUSTAT Merged

In [162]:
# Load Data
CRSP_COMPUSTAT_MERGED = pd.read_csv('Data/CRSP_COMPUSTAT_MERGED_TEST.csv')
CRSP_COMPUSTAT_MERGED

Unnamed: 0,LPERMNO,datacqtr,atq,ceqq,cheq,ltq,niq
0,54594,2017Q1,1502.1,885.7,10.1,616.4,13.7
1,54594,2017Q2,1504.1,914.2,10.3,589.9,21.2
2,54594,2017Q3,1531.7,924.7,15.1,607.0,10.6
3,54594,2017Q4,1544.3,906.5,27.1,637.8,-22.5
4,54594,2018Q1,1512.2,915.2,34.6,597.0,15.5
5,54594,2018Q2,1524.7,936.3,41.6,588.4,12.0
6,54594,2018Q3,1537.8,929.1,44.8,608.7,15.1
7,54594,2018Q4,1604.2,935.6,40.7,668.6,7.0
8,1045,2017Q1,52627.0,3457.0,7219.0,49170.0,340.0
9,21020,2017Q2,53336.0,3715.0,7440.0,49621.0,864.0


In [166]:
# Split Up 'datacqtr' into calendar year and quarter
CRSP_COMPUSTAT_MERGED['CalendarYear'] = CRSP_COMPUSTAT_MERGED['datacqtr'].str.slice(0,4)
CRSP_COMPUSTAT_MERGED['Quarter'] = CRSP_COMPUSTAT_MERGED['datacqtr'].str.slice(4)

# Convert Quarters to Pandas Datetimes
CRSP_COMPUSTAT_MERGED['datacqtr_formatted'] = CRSP_COMPUSTAT_MERGED["CalendarYear"] + "-" + CRSP_COMPUSTAT_MERGED["Quarter"]
CRSP_COMPUSTAT_MERGED['QuarterStart'] = pd.PeriodIndex(CRSP_COMPUSTAT_MERGED['datacqtr_formatted'], freq='Q').to_timestamp()

# Calculate Start of Quarter and End of Quarter Dates
CRSP_COMPUSTAT_MERGED['QuarterEnd'] = CRSP_COMPUSTAT_MERGED['QuarterStart'] + pd.offsets.MonthEnd(3)
CRSP_COMPUSTAT_MERGED['QuarterStart_Month'] = CRSP_COMPUSTAT_MERGED['QuarterStart'].dt.to_period('m')
CRSP_COMPUSTAT_MERGED['QuarterEnd_Month'] = CRSP_COMPUSTAT_MERGED['QuarterEnd'].dt.to_period('m')

# Calculate Lagged Dates (Year-Month)
CRSP_COMPUSTAT_MERGED['Date_Lag2'] = CRSP_COMPUSTAT_MERGED['QuarterEnd_Month'] + 2
CRSP_COMPUSTAT_MERGED['Date_Lag3'] = CRSP_COMPUSTAT_MERGED['QuarterEnd_Month'] + 3
CRSP_COMPUSTAT_MERGED['Date_Lag4'] = CRSP_COMPUSTAT_MERGED['QuarterEnd_Month'] + 4
CRSP_COMPUSTAT_MERGED.head()

Unnamed: 0,LPERMNO,datacqtr,atq,ceqq,cheq,ltq,niq,CalendarYear,Quarter,datacqtr_formatted,QuarterStart,QuarterEnd,QuarterStart_Month,QuarterEnd_Month,Date_Lag2,Date_Lag3,Date_Lag4
0,54594,2017Q1,1502.1,885.7,10.1,616.4,13.7,2017,Q1,2017-Q1,2017-01-01,2017-03-31,2017-01,2017-03,2017-05,2017-06,2017-07
1,54594,2017Q2,1504.1,914.2,10.3,589.9,21.2,2017,Q2,2017-Q2,2017-04-01,2017-06-30,2017-04,2017-06,2017-08,2017-09,2017-10
2,54594,2017Q3,1531.7,924.7,15.1,607.0,10.6,2017,Q3,2017-Q3,2017-07-01,2017-09-30,2017-07,2017-09,2017-11,2017-12,2018-01
3,54594,2017Q4,1544.3,906.5,27.1,637.8,-22.5,2017,Q4,2017-Q4,2017-10-01,2017-12-31,2017-10,2017-12,2018-02,2018-03,2018-04
4,54594,2018Q1,1512.2,915.2,34.6,597.0,15.5,2018,Q1,2018-Q1,2018-01-01,2018-03-31,2018-01,2018-03,2018-05,2018-06,2018-07


# CRSP (Monthly)

In [169]:
# Read in Dataframe
CRSP_MONTHLY = pd.read_csv('Data/CRSP_MONTHLY_TEST.csv')

# Convert to Datetime
CRSP_MONTHLY['date'] = pd.to_datetime(CRSP_MONTHLY['date'])

# Convert Date to Month Period
CRSP_MONTHLY['date_month'] = CRSP_MONTHLY['date'].dt.to_period('m')

CRSP_MONTHLY.head()

Unnamed: 0,PERMNO,date,PRC,SHROUT,CFACPR,date_month
0,21020,2017-01-31,44.25,507294,1,2017-01
1,21020,2017-02-28,46.36,504154,1,2017-02
2,21020,2017-03-31,42.3,495750,1,2017-03
3,21020,2017-04-28,42.62,492589,1,2017-04
4,21020,2017-05-31,48.41,492589,1,2017-05


In [172]:
# Select Accounting Features to Merge into CRSP Monthly Dataframe
CRSP_COMPUSTAT_features = ['atq', 'ceqq', 'cheq', 'ltq', 'niq']

# Select Features to Keep after Merge
featuresToKeep = ['PERMNO', 'date_month', 'PRC', 'SHROUT', 'CFACPR']

# Add Accounting Features to Features to keep after merge
featuresToKeep.extend(CRSP_COMPUSTAT_features)

# Add Lagged Accounting Features
for lag in range(2,5):
    CRSP_COMPUSTAT_merge_features = ['LPERMNO', f'Date_Lag{lag}']
    CRSP_COMPUSTAT_merge_features.extend(CRSP_COMPUSTAT_features)
    if lag==2:
        temp = pd.merge(CRSP_COMPUSTAT_MERGED[CRSP_COMPUSTAT_merge_features],
                                 CRSP_MONTHLY[['PERMNO', 'date_month', 'PRC', 'SHROUT', 'CFACPR']],
                                 how='right',
                                 left_on=['LPERMNO', f'Date_Lag{lag}'],
                                 right_on=['PERMNO', 'date_month']
                                )
        temp = temp[featuresToKeep]
        
    else:
        temp = pd.merge(CRSP_COMPUSTAT_MERGED[CRSP_COMPUSTAT_merge_features],
                                 temp[featuresToKeep],
                                 how='right',
                                 left_on=['LPERMNO', f'Date_Lag{lag}'],
                                 right_on=['PERMNO', 'date_month'],
                        suffixes=('', '_y')
                                )
                      
        # Update Features
        for feature in CRSP_COMPUSTAT_features:
            temp[feature] = temp[feature].fillna(temp[f'{feature}_y'])
            temp = temp.drop([f'{feature}_y'], 1)
        
        temp = temp[featuresToKeep]

temp.head()

Unnamed: 0,PERMNO,date_month,PRC,SHROUT,CFACPR,atq,ceqq,cheq,ltq,niq
0,21020,2017-01,44.25,507294,1,,,,,
1,21020,2017-02,46.36,504154,1,,,,,
2,21020,2017-03,42.3,495750,1,,,,,
3,21020,2017-04,42.62,492589,1,,,,,
4,21020,2017-05,48.41,492589,1,,,,,
