In [25]:
import numpy as np
import pandas as pd
import glob

# Read data
for filename in glob.iglob("sp500_taq/*.dta"):
    # get permnos
    df = pd.read_stata(filename)
    unique_permnos = df.permno.unique()
    with open("permnos.txt", "ab") as f:
        np.savetxt(f, unique_permnos, fmt="%i")
        
# Code has generated permnos.txt file with all the permnos
# sort -u permnos.txt >> output.txt
# This unix command will only save the unique permnos

In [42]:
import numpy as np
import pandas as pd

bdf = pd.read_csv("beta.csv")
bdf['DATE'] = bdf['DATE'].apply(str) # Turn DATE into string

# b_mkt strategy
sdf = pd.DataFrame() # master dataframe

for i in range(1994, 2019):
    for j in range(1,13):
        # code
        year = str(i)
        month = str(j).zfill(2) # Add leading zero if length 1
        date = year + month # ex: 199401
        temp_beta = bdf.loc[bdf['DATE'].str.match(date)].copy()
        temp_beta.loc[:,'quintile'] = pd.qcut(temp_beta['b_mkt'], 5, labels=False, duplicates='drop')
        temp_beta.drop(['n', 'RET', 'alpha', 'b_mkt', 'b_smb', 'b_hml', 'ivol', 'tvol', 'R2', 'exret'], axis=1, inplace=True)
        
        # Correct date (ex: 19940131 == 199402 & 19941231 == 199501)
        if j == 12:
            # December, so change to next year Jan
            temp_beta['DATE'] = str(i+1) + '01'
        else:
            # month not 12
            temp_beta['DATE'] = year + str(j+1).zfill(2)
        
        sdf = sdf.append(temp_beta, ignore_index=True)
                                                                                        
sdf['strategy'] = 'b_mkt'
sdf = sdf[['strategy', 'DATE', 'PERMNO', 'quintile']]

# b_smb strategy
tdf = pd.DataFrame() # master dataframe

for i in range(1994, 2019):
    for j in range(1,13):
        # code
        year = str(i)
        month = str(j).zfill(2) # Add leading zero if length 1
        date = year + month # ex: 199401
        temp_beta = bdf.loc[bdf['DATE'].str.match(date)].copy()
        temp_beta.loc[:,'quintile'] = pd.qcut(temp_beta['b_smb'], 5, labels=False, duplicates='drop')
        temp_beta.drop(['n', 'RET', 'alpha', 'b_mkt', 'b_smb', 'b_hml', 'ivol', 'tvol', 'R2', 'exret'], axis=1, inplace=True)
        
        # Correct date (ex: 19940131 == 199402 & 19941231 == 199501)
        if j == 12:
            # December, so change to next year Jan
            temp_beta['DATE'] = str(i+1) + '01'
        else:
            # month not 12
            temp_beta['DATE'] = year + str(j+1).zfill(2)
        
        tdf = tdf.append(temp_beta, ignore_index=True)

tdf['strategy'] = 'b_smb'
tdf = tdf[['strategy', 'DATE', 'PERMNO', 'quintile']]

# b_hml strategy
rdf = pd.DataFrame() # master dataframe

for i in range(1994, 2019):
    for j in range(1,13):
        # code
        year = str(i)
        month = str(j).zfill(2) # Add leading zero if length 1
        date = year + month # ex: 199402
        temp_beta = bdf.loc[bdf['DATE'].str.match(date)].copy()
        temp_beta.loc[:,'quintile'] = pd.qcut(temp_beta['b_hml'], 5, labels=False, duplicates='drop')
        temp_beta.drop(['n', 'RET', 'alpha', 'b_mkt', 'b_smb', 'b_hml', 'ivol', 'tvol', 'R2', 'exret'], axis=1, inplace=True)
        
        # Correct date (ex: 19940131 == 199402 & 19941231 == 199501)
        if j == 12:
            # December, so change to next year Jan
            temp_beta['DATE'] = str(i+1) + '01'
        else:
            # month not 12
            temp_beta['DATE'] = year + str(j+1).zfill(2)
        
        rdf = rdf.append(temp_beta, ignore_index=True)

rdf['strategy'] = 'b_hml'
rdf = rdf[['strategy', 'DATE', 'PERMNO', 'quintile']]

# At this point, we have sdf, rdf, tdf
# Merge them together into one master dataframe
mdf = pd.concat([sdf, tdf, rdf], ignore_index=True)
# Rename columns
mdf.rename(columns={'DATE': 'date', 'PERMNO': 'permno', 'quintile': 'portfolio'}, inplace=True)
# Save as csv
mdf.to_csv('mdf.csv', encoding='utf-8', index=False)

In [3]:
import numpy as np
import pandas as pd
import glob

# calculate returns
for filename in glob.iglob("sp500_taq/*.dta"):
    ret = pd.DataFrame()
    # Read return data & get unique permnos
    df = pd.read_stata(filename)
    df['permno'] = df['permno'].astype('int32')
    unique_permnos = df.permno.unique()
    unique_dates = df.td.unique()
    # For each permno, calc perchange change
    for permno in unique_permnos: # 10078        
        for date in unique_dates:
            temp = df.loc[(df['permno'] == permno) & (df['td'] == date)].copy()
            temp.iprice = temp.iprice.pct_change()
            ret = ret.append(temp, ignore_index=True)
    
    # Clean NaN to 0
    ret['iprice'].fillna(0, inplace=True)
    # export as csv
    newf = filename.replace('sp500_taq/','sp500_ret/')
    newf = newf.replace('.dta','.csv')
    ret.to_csv(newf, encoding='utf-8', index=False)

In [43]:
import numpy as np
import pandas as pd

# Change mdf date format
# 199402 -> 1994-02

def convert_date(date_oldformat):
    date_oldformat = str(date_oldformat)
    date_newformat = date_oldformat[:4] + '-' + date_oldformat[4:]
    return date_newformat

mdf = pd.read_csv('mdf.csv')
mdf['date'] = mdf['date'].apply(convert_date)

mdf.to_csv('mdf.csv', encoding='utf-8', index=False)

In [4]:
import numpy as np
import pandas as pd
import glob

# Assign portfolios to intraday returns for each strategy
mdf = pd.read_csv('mdf.csv')

# b_mkt
bmkt_df = mdf.loc[mdf['strategy'] == 'b_mkt'].copy()
bsmb_df = mdf.loc[mdf['strategy'] == 'b_smb'].copy()
bhml_df = mdf.loc[mdf['strategy'] == 'b_hml'].copy()

for filename in glob.iglob("sp500_ret/*.csv"):
    rdf = pd.read_csv(filename)
    rdf['permno'] = rdf['permno'].astype('int64')
    rdf['date'] = rdf['td'].str[:7]
    # b_mkt
    merged = rdf.merge(bmkt_df, how='left',on=['date','permno'])
    merged.drop(columns=['date'], inplace=True)
    newf = filename.replace('sp500_ret/','b_mkt/')
    merged.to_csv(newf, encoding='utf-8', index=False)
    # b_smb
    merged = rdf.merge(bsmb_df, how='left',on=['date','permno'])
    merged.drop(columns=['date'], inplace=True)
    newf = filename.replace('sp500_ret/','b_smb/')
    merged.to_csv(newf, encoding='utf-8', index=False)
    # b_hml
    merged = rdf.merge(bhml_df, how='left',on=['date','permno'])
    merged.drop(columns=['date'], inplace=True)
    newf = filename.replace('sp500_ret/','b_hml/')
    merged.to_csv(newf, encoding='utf-8', index=False)


  interactivity=interactivity, compiler=compiler, result=result)


In [23]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt

# Calculate portfolio returns
# For each unique date / strategy, there is one portfolio
# Short 0, Long 4

# Get returns of 0, mult -1, divide by weight, sum
strategies = ['b_mkt','b_smb','b_hml']
for strat in strategies:
    directory = strat + '/*.csv'
    for filename in glob.iglob(directory):
        df = pd.read_csv(filename)
        mdf = pd.DataFrame()

        unique_dates = df.td.unique()

        for date in unique_dates:
            # portfolio 0
            zdf = df.loc[(df['td'] == date) & (df['portfolio'] == 0)].copy()
            # portfolio 4
            fdf = df.loc[(df['td'] == date) & (df['portfolio'] == 4)].copy()
            # get weight
            weight = 1 / (len(zdf.permno.unique()) + len(fdf.permno.unique()))
            zdf['iprice'] *= (-1 * weight) # Short
            zsum = zdf.groupby(['tr', 'td'], as_index=False)['iprice'].sum()

            fdf['iprice'] *= weight # Long
            fsum = fdf.groupby(['tr', 'td'], as_index=False)['iprice'].sum()

            # Merge & sum to get long-short portfolio returns
            merged = pd.concat([zsum, fsum]).groupby(["tr", "td"], as_index=False)["iprice"].sum()

            # append to master df
            merged['strategy'] = 'b_mkt' # Add strategy
            merged = merged[['strategy', 'tr', 'td', 'iprice']] # Re-order cols
            mdf = mdf.append(merged, ignore_index=True)

        # Save to folder
        newf = 'portfolio_ret/' + filename
        mdf.to_csv(newf, encoding='utf-8', index=False)


# zdf = df.loc[(df['td'] == '1994-02-04 00:00:00') & (df['portfolio'] == 0)].copy()
# fdf = df.loc[(df['td'] == '1994-02-04 00:00:00') & (df['portfolio'] == 4)].copy()
# # get weight
# weight = 1 / (len(zdf.permno.unique()) + len(fdf.permno.unique()))
# zdf['iprice'] *= (-1 * weight) # Short
# zsum = zdf.groupby(['tr', 'td'], as_index=False)['iprice'].sum()
# fdf['iprice'] *= weight # Long
# fsum = fdf.groupby(['tr', 'td'], as_index=False)['iprice'].sum()
# merged = pd.concat([zsum, fsum]).groupby(["tr", "td"], as_index=False)["iprice"].sum()
# merged.head()

  interactivity=interactivity, compiler=compiler, result=result)
