In [2]:
# packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

### Kenneth French 'Market Equity'-data. 
ME is market cap at the end of the previous month. 
PRIOR_RET is from -12 to - 2.

$\textbf{Construction:}$ The portfolios, which are constructed monthly, are the intersections of 2 portfolios formed on size (market equity, ME) and 3 portfolios formed on prior (2-12) return. The monthly size breakpoint is the median NYSE market equity. The monthly prior (2-12) return breakpoints are the 30th and 70th NYSE percentiles.

$\textbf{Stocks:}$ The six portfolios constructed each month include NYSE, AMEX, and NASDAQ stocks with prior return data. To be included in a portfolio for month t (formed at the end of month t-1), a stock must have a price for the end of month t-13 and a good return for t-2. In addition, any missing returns from t-12 to t-3 must be -99.0, CRSP's code for a missing price. Each included stock also must have ME for the end of month t-1.

$\textbf{Column Names:}$
- SMALL LoPRIOR: 

SMALLer firms (lower market equity than NYSE market median) and lower returns (beneath 30th prior (2-12) percentile) in the PRIOR period.
- ME1 PRIOR2: 

SMALLer firms (lower market equity than NYSE market median) and medium returns (between 30th and 70th prior (2-12) percentile) in the PRIOR period.
- SMALL HiPRIOR: 

SMALLer firms (lower market equity than NYSE market median) and higher returns (above 70th prior (2-12) percentile)in the PRIOR period.
- BIG LoPRIOR: 

BIGer firms (lower market equity than NYSE market median) and lower returns (beneath 30th prior (2-12) percentile) in the PRIOR period.
- ME2 PRIOR2: 

BIGer firms (lower market equity than NYSE market median) and medium returns (between 30th and 70th prior (2-12) percentile) in the PRIOR period.
- BIG HiPRIOR: 

BIGer firms (lower market equity than NYSE market median) and higher returns (above 70th prior (2-12) percentile) in the PRIOR period.

### Average Value Weighted Returns -- Monthly:

In [3]:
pf_6_vwr = pd.read_csv("Data/6_Portfolios_ME_Prior_12_2.csv", skiprows=11, nrows = 1171)
pf_6_vwr = pf_6_vwr.rename(columns = {"Unnamed: 0" : "Date"})
pf_6_vwr["Date"] = pd.to_datetime(pf_6_vwr["Date"], format="%Y%m") + pd.offsets.MonthEnd()
pf_6_vwr = pf_6_vwr[(pf_6_vwr["Date"]<=pd.Timestamp('2023-12-31')) & (pf_6_vwr["Date"]>=pd.Timestamp('1927-01-01'))] 
pf_6_vwr = pf_6_vwr.set_index("Date").sort_index(ascending=False)
#pf_6_vwr = pf_6_vwr[['SMALL LoPRIOR', 'SMALL HiPRIOR', 'BIG LoPRIOR', 'BIG HiPRIOR', 'ME1 PRIOR2', 'ME2 PRIOR2']]

In [4]:
missing_values_vwr = [-99.99, -999]
missing_count_vwr = pf_6_vwr.isin(missing_values_vwr).sum()
missing_count_vwr = missing_count_vwr[missing_count_vwr > 0]

#na_count_vwr = pf_6_vwr.isna().sum()
#empty_count_vwr = (pf_6_vwr == '').sum()

print(missing_count_vwr)
#print(missing_count_vwr, na_count_vwr, empty_count_vwr)

Series([], dtype: int64)


### Number of Firms in Portfolios - Monthly:

In [5]:
pf_6_nf = pd.read_csv("Data/6_Portfolios_ME_Prior_12_2.csv", skiprows=2563, nrows = 1171)
pf_6_nf = pf_6_nf.rename(columns = {"Unnamed: 0" : "Date"})
pf_6_nf["Date"] = pd.to_datetime(pf_6_nf["Date"], format="%Y%m") + pd.offsets.MonthEnd()
pf_6_nf = pf_6_nf[(pf_6_nf["Date"]<=pd.Timestamp('2023-12-31')) & (pf_6_nf["Date"]>=pd.Timestamp('1927-01-01'))] 
pf_6_nf = pf_6_nf.set_index("Date").sort_index(ascending=False)

In [6]:
missing_values_nf = [-99.99, -999]
missing_count_nf = pf_6_nf.isin(missing_values_nf).sum()
missing_count_nf = missing_count_nf[missing_count_nf > 0]

#na_count_nf = pf_6_nf.isna().sum()
#empty_count_nf = (pf_6_nf == '').sum()

print(missing_count_nf)
#print(missing_count_nf, missing_count_nf, empty_count_nf)

Series([], dtype: int64)


### Average Firm Size - Monthly:

In [7]:
pf_6_afs = pd.read_csv("Data/6_Portfolios_ME_Prior_12_2.csv", skiprows=3738, nrows = 1171)
pf_6_afs = pf_6_afs.rename(columns = {"Unnamed: 0" : "Date"})
pf_6_afs["Date"] = pd.to_datetime(pf_6_afs["Date"], format="%Y%m") + pd.offsets.MonthEnd()
pf_6_afs = pf_6_afs[(pf_6_afs["Date"]<=pd.Timestamp('2023-12-31')) & (pf_6_afs["Date"]>=pd.Timestamp('1927-01-01'))] 
pf_6_afs = pf_6_afs.set_index("Date").sort_index(ascending=False)

In [8]:
missing_values_afs = [-99.99, -999]
missing_count_afs = pf_6_afs.isin(missing_values_afs).sum()
missing_count_afs = missing_count_afs[missing_count_afs > 0]

#na_count_afs = pf_6_afs.isna().sum()
#empty_count_afs = (pf_6_afs == '').sum()

print(missing_count_afs)
#print(missing_count_afs, missing_count_afs, empty_count_afs)

Series([], dtype: int64)


Callculate entire stock market return:

In [9]:
market_cap = pf_6_nf.multiply(pf_6_afs)
total_market_cap = market_cap.sum(axis=1)
weights = market_cap.divide(total_market_cap, axis=0)
weighted_returns = pf_6_vwr.multiply(weights)
total_market_return = weighted_returns.sum(axis=1)
pf_6_returns = pf_6_vwr.copy()
pf_6_returns['Market Return'] = weighted_returns.sum(axis=1).round(2)

Gem rensede filer:

In [17]:
pf_6_returns.sort_index(ascending=True, inplace=True)

In [19]:
pf_6_returns.to_csv("Data_clean/6_Portfolios_ME_Prior_12_2_returns.csv")
#pf_6_nf.to_csv("Data_clean/6_Portfolios_ME_Prior_12_2_nf.csv")
#pf_6_afs.to_csv("Data_clean/6_Portfolios_ME_Prior_12_2_afs.csv")