### Emprical Asset Pricing - Problem Set 1

## 1. Time-Series Predictability of returns and dividend growth

### 1.1 Data Extraction and Cleaning

In [1]:
# Packages
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt
import wrds
from datetime import datetime, timedelta
import warnings
from pandas.tseries.offsets import MonthEnd
warnings.simplefilter('ignore') # 

# Setups
pd.set_option("display.max_rows", 100)

In [2]:
# Set Up WRDS connection
db = wrds.Connection(wrds_username='vince_solis') # make sure to change the username. 

Loading library list...
Done


In [3]:
def get_libraries(db, printn = False):
    """
    get list of libraries
    """

    # List all libraries in WRDS
    libs = db.list_libraries()
    libs.__class__  # Notice that libs is a list.
    libs = pd.DataFrame({'libraries': libs})  # Transform libs to a Pandas data frame to have a better display.
    libs = libs.sort_values(['libraries']).reset_index(drop = True)
    
    # https://wrds-www.wharton.upenn.edu/pages/browse-data-concept/
    if printn:
        print(libs.to_string())  # tr_mutualfunds, tr_13f, tr_13f
    else:
        return libs

def get_tables_in_library(db, library):
    """
    List of tables in a library
    """

    return db.list_tables(library=library)

In [4]:
# Get specific columns within a particular range from table dsi (daily stock indices) from library crsp_q_stock
start_date, end_date = dt.date(1945, 1, 1), dt.date(2023, 12, 31)
start_date, end_date = start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')

df = db.raw_sql("SELECT date, vwretd, vwretx  FROM crsp_q_stock.dsi WHERE (date BETWEEN '{}' AND '{}')".format(start_date, end_date))
df.head()

AttributeError: 'Connection' object has no attribute 'cursor'

In [32]:
df = pd.read_csv('./data/crsp_1945-2023.csv', index_col=0)
df.index = pd.to_datetime(df.index).to_period('M')
df.head()

Unnamed: 0_level_0,vwretd,vwretx
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1945-01,0.020218,0.018951
1945-02,0.064477,0.059894
1945-03,-0.039177,-0.043164
1945-04,0.078232,0.076981
1945-05,0.018185,0.012439


In [33]:
ff = pd.read_csv('./data/F-F_Rf.csv', infer_datetime_format=True, index_col=0)
ff.index = pd.to_datetime(ff.index).to_period('M')
ff.head()

Unnamed: 0_level_0,rf
date,Unnamed: 1_level_1
1926-07,0.22
1926-08,0.25
1926-09,0.23
1926-10,0.32
1926-11,0.31


### 1.2 Computing monthly dividend

$$ 
\frac{D_t}{P_t} = \frac{P_{t+1} + D_{t+1}}{P_t} -  \frac{P_{t+1}}{P_t} \quad \Rightarrow \quad D_t = \frac{D_t}{P_t} \times P_t 
$$


In [34]:
df['dividend_ret'] = df['vwretd'] - df['vwretx']
df.head()

Unnamed: 0_level_0,vwretd,vwretx,dividend_ret
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1945-01,0.020218,0.018951,0.001267
1945-02,0.064477,0.059894,0.004583
1945-03,-0.039177,-0.043164,0.003987
1945-04,0.078232,0.076981,0.001251
1945-05,0.018185,0.012439,0.005746


Constructing a price index, here we set the intial price as 1

In [35]:
df['price_index'] = (1 + df['vwretd']).cumprod()
# df['price_index'] = df['price_index']*10
df.head()

Unnamed: 0_level_0,vwretd,vwretx,dividend_ret,price_index
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1945-01,0.020218,0.018951,0.001267,1.020218
1945-02,0.064477,0.059894,0.004583,1.085999
1945-03,-0.039177,-0.043164,0.003987,1.043452
1945-04,0.078232,0.076981,0.001251,1.125084
1945-05,0.018185,0.012439,0.005746,1.145543


In [36]:
df['dividend'] = df['dividend_ret'] * df['price_index'].shift(1)
df.fillna(method='bfill',inplace=True)

### 1.3 Aggregating dividends

We assume dividend being reinvested at risk-free rate and alternatively into aggregate stock market. 

In [53]:
df_temp = df.merge(ff, left_index=True, right_index=True)
df_temp.head()

Unnamed: 0_level_0,vwretd,vwretx,dividend_ret,price_index,dividend,rf
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1945-01,0.020218,0.018951,0.001267,1.020218,0.004676,0.03
1945-02,0.064477,0.059894,0.004583,1.085999,0.004676,0.02
1945-03,-0.039177,-0.043164,0.003987,1.043452,0.00433,0.02
1945-04,0.078232,0.076981,0.001251,1.125084,0.001305,0.03
1945-05,0.018185,0.012439,0.005746,1.145543,0.006465,0.03


In [54]:
# transfer the risk free rate to numarical and adjust the scale
df_temp['rf'] = pd.to_numeric(df_temp['rf'], errors='coerce')/100
df_temp['compound_month'] = 12 - df_temp.index.month

def compound_dividend(row, reinvestment_rate):
    """
    calculates the dividend compouned at reinvestment_rate(geometric return)
    """
    rate = row[reinvestment_rate] if reinvestment_rate == 'rf' else row['vwretd']
    months = row['compound_month']
    return row['dividend'] * ((1 + rate)** months )

In [55]:
df_temp['dividend_compounded_cash'] = df_temp.apply(compound_dividend, reinvestment_rate='monthly_interest_rate', axis=1)
df_temp['dividend_compounded_stock'] = df_temp.apply(compound_dividend, reinvestment_rate='vwretd', axis=1)

In [63]:
# Assuming 'df_temp' is your DataFrame
df_temp['year'] = df_temp.index.year

# Compute Compounded Annual Returns for vwretx
compounded_annual_returns_dv = df_temp.groupby('year')['vwretd'].apply(lambda x: (x + 1).prod() - 1)

# Compute Compounded Annual Returns for vwretx
compounded_annual_returns_xdv = df_temp.groupby('year')['vwretx'].apply(lambda x: (x + 1).prod() - 1)

# Adjusting calculations for annual dividends and their growth
annual_div_zero = df_temp.groupby('year')['dividend'].sum()
annual_div_cash = df_temp.groupby('year')['dividend_compounded_cash'].sum()
annual_div_stock = df_temp.groupby('year')['dividend_compounded_stock'].sum()

# Calculate annual dividend growth as the percentage difference (year-over-year change)
dg_zero = annual_div_zero.pct_change()
dg_cash = annual_div_cash.pct_change()
dg_stock = annual_div_stock.pct_change()

price_index_annual = df_temp.groupby('year')['price_index'].last()

log_price_dividend_ratio_zero = np.log(price_index_annual / annual_div_zero)
log_price_dividend_ratio_cash = np.log(price_index_annual / annual_div_cash)
log_price_dividend_ratio_stock = np.log(price_index_annual / annual_div_cash)

# Creating a new DataFrame to hold all calculated data
annual_df = pd.DataFrame({
    'ret_dv': compounded_annual_returns_dv,
    'ret_xdv': compounded_annual_returns_xdv,
    'g_Zero': dg_zero,
    'g_Cash': dg_cash,
    'g_Stock': dg_stock,
    'log_PD_Zero': log_price_dividend_ratio_zero,
    'log_PD_Cash': log_price_dividend_ratio_cash,
    'log_PD_Stock': log_price_dividend_ratio_stock
})

# Resetting index to have 'year' as a column
annual_df.reset_index(inplace=True)

In [64]:
annual_df.describe().round(2)

Unnamed: 0,year,ret_dv,ret_xdv,g_Zero,g_Cash,g_Stock,log_PD_Zero,log_PD_Cash,log_PD_Stock
count,79.0,79.0,79.0,78.0,78.0,78.0,79.0,79.0,79.0
mean,1984.0,0.13,0.09,0.1,0.1,0.1,3.57,3.5,3.5
std,22.95,0.17,0.17,0.08,0.15,0.15,0.43,0.43,0.43
min,1945.0,-0.38,-0.4,-0.14,-0.16,-0.16,2.77,2.68,2.68
25%,1964.5,0.0,-0.03,0.06,-0.0,-0.0,3.26,3.14,3.14
50%,1984.0,0.16,0.12,0.09,0.09,0.09,3.52,3.48,3.48
75%,2003.5,0.25,0.22,0.13,0.18,0.18,3.94,3.84,3.84
max,2023.0,0.5,0.43,0.32,0.63,0.63,4.49,4.42,4.42


In [None]:
# compute mean and volatility
mean_growth_cash = annual_df['g_Cash'].mean()
volatility_growth_cash = annual_df['g_Cassh'].std()
mean_growth_market = annual_df['g_Stock'].mean()
volatility_growth_market = annual_df['g_Stock'].std()

print(f"Dividend growth mean_reinvest in cash market:{mean_growth_cash}")
print(f"Dividend growth volatility_reinvest in cash market:{volatility_growth_cash}")

print("Dividend growth mean_reinvest in stock market:")
print(mean_growth_market)
print("Dividend growth volatility_reinvest in stock market:")
print(volatility_growth_market)