# Init

In [97]:
# library
library(feather)
library(repr)
library(jsonlite)
library(dominanceanalysis)
library(Matrix)
suppressMessages(library(glmnet))

# data_dir
if (tolower(str_sub(getwd(), -11)) == 'onedrive/cc') {
    DATA_DIR = str_c(getwd(), '/data')
    WRDS_DOWNLOAD_DIR = str_c(DATA_DIR, '/WRDS-download')
    cat(str_c('Current working directory: ', getwd()))
} else {
    cat(str_c('Please set working dir to "~/onedrive/cc"'))
}

# options for plot
options(repr.plot.width=7, repr.plot.height=4, repr.plot.res = 300)

Current working directory: C:/Users/rossz/Onedrive/CC

# ranking accuracy

In [70]:
suppressWarnings({
    car_ranking_truth = as.data.table(read_feather('data/car_ranking.feather'))
    car_ranking_3y_text = as.data.table(read_feather('data/car_ranking_3y_text.feather'))
    car_ranking_3y_text_fr = as.data.table(read_feather('data/car_ranking_3y_text_fr.feather'))
})
ld(car_ranking_3y_ols)

car_ranking = rbindlist(list(car_ranking_truth[, .(roll_type, window, docid, car=t_car, model='truth')],
                             car_ranking_3y_text[, .(roll_type, window, docid, car=y_car, model='tsfm-text')],
                             car_ranking_3y_text_fr[, .(roll_type, window, docid, car=y_car, model='tsfm-text-fr')],
                             car_ranking_3y_ols[, .(roll_type, window, docid, car=y_car, model='ols')]),
                        fill=T)
valid_docid = intersect(car_ranking[roll_type=='1y', docid], car_ranking[roll_type==''])

-car_ranking_3y_ols- already exists, will NOT load again!  (0 secs)


In [88]:
top_n = 10

car_ranking = car_ranking_truth[, .(roll_type, window, docid, car=t_car)
    ][car_ranking_3y_ols[, .(roll_type, window, docid, car_ols=y_car)], on=.(roll_type, window, docid), nomatch=NULL
    ][car_ranking_3y_text[, .(roll_type, window, docid, car_text=y_car)], on=.(roll_type, window, docid), nomatch=NULL
    ][car_ranking_3y_text_fr[, .(roll_type, window, docid, car_text_fr=y_car)], on=.(roll_type, window, docid), nomatch=NULL
    ][, {
         top_truth = order(-car)[1:top_n]
         top_ols = order(-car_ols)[1:top_n]
         top_text = order(-car_text)[1:top_n]
         top_text_fr = order(-car_text_fr)[1:top_n]
    
         btm_truth = order(car)[1:top_n]
         btm_ols = order(car_ols)[1:top_n]
         btm_text = order(car_text)[1:top_n]
         btm_text_fr = order(car_text_fr)[1:top_n]
    
         list(ols_winner_acc=length(intersect(top_truth, top_ols))/length(top_truth)*100,
              text_winner_acc = length(intersect(top_truth, top_text))/length(top_truth)*100,
              text_fr_winner_acc = length(intersect(top_truth, top_text_fr))/length(top_truth)*100,
              ols_loser_acc = length(intersect(btm_truth, btm_ols))/length(btm_truth)*100,
              text_loser_acc = length(intersect(btm_truth, btm_text))/length(btm_text)*100,
              text_fr_loser_acc = length(intersect(btm_truth, btm_text_fr))/length(btm_text)*100)
      },
      keyby=.(roll_type, window)]

car_ranking[, lapply(.SD, mean), .SDcols=c('ols_winner_acc', 'text_fr_winner_acc', 'ols_loser_acc', 'text_fr_loser_acc')]

ols_winner_acc,text_fr_winner_acc,ols_loser_acc,text_fr_loser_acc
<dbl>,<dbl>,<dbl>,<dbl>
30.625,30.9375,19.6875,20.625


# backtest

In [98]:
ld(rolling_split_dates)
ld(gvkey_permno_link)
ld(comp_secd, path=WRDS_DOWNLOAD_DIR)

targets_df = as.data.table(read_feather('data/f_sue_keydevid_car_finratio_vol_transcriptid_sim_text.feather'))

all_cols = names(targets_df)
text_cols = c('text_present', 'text_qa', 'text_ans', 'text_ques', 'text_all')
non_text_cols = all_cols[!all_cols %in% text_cols]

targets_df = targets_df[, ..non_text_cols]

-rolling_split_dates- already exists, will NOT load again!  (0 secs)
-gvkey_permno_link- already exists, will NOT load again!  (0 secs)
-comp_secd- loaded  (3.29 mins)


In [60]:
car = fread('./data/CAR/cars_30d_call.csv', colClass=c('integer', 'double', rep('character', times=2), 'integer', rep('double', times=7)))[,
      ':='(edate=ymd(edate), rdate=ymd(rdate))
    ][isevt==1 & (evttime %between% c(0,30))]
car[1]

isevt,permno,edate,rdate,evttime,ret,abret,alpha,beta_mktrf,beta_smb,beta_hml,_nobs
<int>,<dbl>,<date>,<date>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,10078,2008-05-01,2008-05-01,0,0.04278417,0.03215787,-0.001651861,0.788845,-0.1782391,-0.3226036,125


In [58]:
buy_line = 10
buy_start = ymd('2011-01-01')
buy_end = ymd('2018-12-31')
n_long_max = 50
n_short_max = 50

In [115]:
backtest

gvkey,ciq_call_date,car_0_30,rdate,evttime,ret,abret
<chr>,<date>,<dbl>,<date>,<int>,<dbl>,<dbl>
004560,2008-01-08,19.71431,2008-01-08,0,-0.064161777,-0.0229198008
012689,2008-01-08,42.90410,2008-01-08,0,-0.091991283,-0.0426851784
001356,2008-01-09,24.40276,2008-01-09,0,0.008064516,-0.0107439921
004560,2008-01-08,19.71431,2008-01-09,1,-0.003088397,-0.0112835873
012689,2008-01-08,42.90410,2008-01-09,1,0.011322921,0.0136199084
162129,2008-01-09,17.41837,2008-01-09,0,0.007242270,-0.0273469257
001356,2008-01-09,24.40276,2008-01-10,1,0.006080017,-0.0050717858
004560,2008-01-08,19.71431,2008-01-10,2,0.000000000,-0.0205452098
012689,2008-01-08,42.90410,2008-01-10,2,0.052445572,0.0391836096
162129,2008-01-09,17.41837,2008-01-10,1,0.050780773,0.0327791723
