In [209]:
import pandas as pd
import numpy as np
import os
import re
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

from linearmodels.system import SUR
from pathlib import Path
from IPython.display import display
from scipy.stats import pearsonr
from collections import OrderedDict
from sklearn.linear_model import LinearRegression
from linearmodels.system.results import WaldTestStatistic


import warnings
warnings.filterwarnings('ignore')

In [210]:
root = Path(os.getcwd())

In [211]:
def set_timeframe(data, start_date, end_date):
    data = data.loc[data.index.get_level_values('date') >= start_date]
    data = data.loc[data.index.get_level_values('date') <= end_date]
    return data

In [212]:
predictors = pd.read_parquet(root / 'data' / 'predictors.parquet.brotli')

In [213]:
predictors = set_timeframe(predictors, '1925-01-01', '2022-01-01')

# Handling Data

In [214]:
regression = predictors.copy(deep=True)

In [215]:
regression = regression.dropna()
# regression = regression.fillna(0)

In [216]:
# Bubble dates from paper
bubble_date = pd.read_csv(root / 'data' / 'bubbles.csv')
bubble_date = bubble_date.applymap(lambda s: s.lower() if type(s) == str else s)
bubble_date.date = pd.to_datetime(bubble_date.date)
bubble_date.date = bubble_date.date.dt.to_period('M').dt.to_timestamp('M')
bubble_date = bubble_date.set_index(['date', 'ind'])

In [217]:
# Add excess returns
regression['rf_two_vwr_ind'] = regression['two_vwr_ind'] - (regression['rf']/100)
regression['market_two_vwr_ind'] = regression['two_vwr_ind'] - regression['market_return']

In [218]:
regression.index.get_level_values(1).unique()

Index(['autos', 'beer', 'boxes', 'chems', 'chips', 'coal', 'drugs', 'elceq',
       'fin', 'food', 'fun', 'hshld', 'mach', 'mines', 'oil', 'rtail', 'ships',
       'smoke', 'steel', 'telcm', 'trans', 'util', 'bldmt', 'labeq', 'rlest',
       'txtls', 'aero', 'hardw', 'bussv', 'clths', 'cnstr', 'medeq', 'agric',
       'meals', 'insur', 'banks', 'books', 'whlsl', 'toys', 'persv', 'other',
       'paper', 'rubbr', 'gold', 'guns', 'hlth', 'soda', 'softw', 'fabpr'],
      dtype='object', name='ind')

# Creating Table 4 (Bubbles for Fama)

In [219]:
all_bubble = regression[regression.index.isin(bubble_date.index)]

In [220]:
table4 = pd.DataFrame(index=['Past 2-year Return', 'Excess Past 2-year Return', 'Volatility (VW)', 'Volatility (VW)- 1yr-Δ', 'Turnover (VW)',
                           'Turnover (VW)- 1yr-Δ', 'Firm Age (VW)', 'Age tilt', ' % Issuers', 'Book to Market (VW)', 'Sales Growth',
                           'CAPE', 'Acceleration'])
column_order = ['two_vwr_ind', 'rf_two_vwr_ind', 'vol_vw', 'vol_vw_change', 'turnover_vw', 
                'turnover_vw_change', 'age_market_rank', 'age_tilt', 'issuance', 'btm_vwr', 
                'revenue_market_rank', 'cape', 'acc']

In [221]:
all_ind_mean = regression[column_order].mean().values
all_ind_std = regression[column_order].std().values
table4['All industry-years Mean'] = all_ind_mean
table4['All industry-years STD'] = all_ind_std

In [222]:
run_up_mean = np.array(all_bubble[column_order].mean())
run_up_std = np.array(all_bubble[column_order].std())
table4['Run-ups Mean'] = run_up_mean
table4['Run-ups STD'] = run_up_std

### Detecting crashes

In [223]:
# Function to check if a crash occurred
def did_crash_occur(data, run_up_date):
    two_year_end = run_up_date + pd.DateOffset(years=2)
    post_runup_data = data.loc[run_up_date:two_year_end]
    
    # Calculate cumulative return from run-up date
    post_runup_data['cumulative_return'] = (1 + post_runup_data['vwr_ind']).cumprod() - 1

    # Check if there was any point where the drawdown from the max was 40% or more
    if any(post_runup_data['cumulative_return'] <= (post_runup_data['cumulative_return'].cummax() - 0.4)):
        return 1  # Crash occurred
    return 0  # No crash

In [224]:
run_up_check_crash = pd.DataFrame(columns=['date', 'ind', 'crash'])

for (run_up_date, industry) in bubble_date.index:
    crash = did_crash_occur(regression.xs(industry, level=1), run_up_date)
    run_up_check_crash = pd.concat([run_up_check_crash, pd.DataFrame({'date': [run_up_date], 'ind': [industry], 'crash': [crash]})], ignore_index=True)
    
run_up_check_crash = run_up_check_crash.set_index(['date', 'ind'])
# no_crash = run_up_check_crash.loc[run_up_check_crash.crash==0]
# crash = run_up_check_crash.loc[run_up_check_crash.crash==1]
no_crash = bubble_date.loc[bubble_date.bubble==0]
crash = bubble_date.loc[bubble_date.bubble==1]

In [225]:
crash_bubble = regression[regression.index.isin(crash.index)]
run_up_crash_mean = np.array(crash_bubble[column_order].mean())
run_up_crash_std = np.array(crash_bubble[column_order].std())
table4['Run-ups with Crash Mean'] = run_up_crash_mean
table4['Run-ups with Crash STD'] = run_up_crash_std

In [226]:
no_crash_bubble = regression[regression.index.isin(no_crash.index)]
run_up_no_crash_mean = np.array(no_crash_bubble[column_order].mean())
run_up_no_crash_std = np.array(no_crash_bubble[column_order].std())
table4['Run-ups with no Crash Mean'] = run_up_no_crash_mean
table4['Run-ups with no Crash STD'] = run_up_no_crash_std

In [227]:
table4['Crash minus no Crash Mean'] = table4['Run-ups with Crash Mean'] - table4['Run-ups with no Crash Mean']

### Regression

In [228]:
def t_stat(data, characteristics, y, cluster):
    results = []
    table = pd.DataFrame(index=['Past 2-year Return', 'Excess Past 2-year Return', 'Volatility (VW)', 'Volatility (VW)- 1yr-Δ', 'Turnover (VW)',
                             'Turnover (VW)- 1yr-Δ', 'Firm Age (VW)', 'Age tilt', ' % Issuers', 'Book to Market (VW)', 'Sales Growth',
                             'CAPE', 'Acceleration'])
    for X in characteristics:
        X_val = data[[X]]
        y_val = data[[y]]
        X_val = sm.add_constant(X_val)
        # Fit model with robust standard errors clustered by calendar year
        model_year_clustered = sm.OLS(y_val, X_val).fit(cov_type='cluster', cov_kwds={'groups': data[cluster]})

        # Metrics
        results.append({
            'b': model_year_clustered.params[X],
            't': model_year_clustered.tvalues[X],
            'r': model_year_clustered.rsquared,
            'f': model_year_clustered.fvalue,
            'p': model_year_clustered.f_pvalue
        })
        
    table['b'] = [res['b'] for res in results]
    table['t'] = [res['t'] for res in results]
    table['r'] = [res['r'] for res in results]
    table['f'] = [res['f'] for res in results]
    table['p'] = [res['p'] for res in results]
    return table

In [229]:
characteristics = ['two_vwr_ind', 'rf_two_vwr_ind', 'vol_vw', 'vol_vw_change', 'turnover_vw', 'turnover_vw_change', 'age_market_rank', 'age_tilt', 'issuance', 'btm_vwr', 'revenue_market_rank', 'cape', 'acc']

# Current VWR

In [230]:
run_up_regression = all_bubble.copy(deep=True)
run_up_regression['Year'] = run_up_regression.index.get_level_values('date').year

In [231]:
run_up_regression = run_up_regression.merge(bubble_date, left_index=True, right_index=True, how='left')

In [232]:
run_up_stats = t_stat(run_up_regression, characteristics, 'bubble', 'Year')

# Future VWR

In [233]:
def calc_cumulative(df, date, ind, months=24, return_col='vwr_ind'):
    start_date = pd.to_datetime(date)
    end_date = start_date + pd.DateOffset(months=months)
    mask = (df.index.get_level_values('date') > start_date) & (df.index.get_level_values('date') <= end_date) & (df.index.get_level_values('ind') == ind)
    sub_df = df[mask].copy()
    # Calculate the cumulative returns
    cumulative_returns = (1 + sub_df[return_col]).cumprod() - 1
    sub_df['two_vwr_ind'] = cumulative_returns
    return sub_df

In [234]:
two_regression = pd.concat([calc_cumulative(regression, date, ind) for date, ind in bubble_date.index.tolist()])

In [235]:
two_regression['Year'] = two_regression.index.get_level_values('date').year

In [236]:
two_stats = t_stat(two_regression, characteristics, 'vwr_ind', 'Year')

# Check

In [237]:
combine = pd.concat([run_up_stats['t'], two_stats['t']], axis=1)
combine.columns=['[t] for Bubble Indicators', '[t] for 2 years VWR']

In [238]:
combine

Unnamed: 0,[t] for Bubble Indicators,[t] for 2 years VWR
Past 2-year Return,0.228368,3.340378
Excess Past 2-year Return,0.102515,-0.124599
Volatility (VW),2.730078,-1.337141
Volatility (VW)- 1yr-Δ,2.681506,-3.220187
Turnover (VW),2.243964,-0.408664
Turnover (VW)- 1yr-Δ,-0.111963,-0.659577
Firm Age (VW),0.506838,-1.114545
Age tilt,0.123037,-3.843774
% Issuers,-0.597544,-0.955762
Book to Market (VW),2.056386,-1.025622
