# Init

In [32]:
# library
library(feather)
library(repr)
library(jsonlite)
library(dominanceanalysis)
library(Matrix)
suppressMessages(library(glmnet))
library(rlist)


# data_dir
if (tolower(str_sub(getwd(), -11)) == 'onedrive/cc') {
    DATA_DIR = str_c(getwd(), '/data')
    WRDS_DOWNLOAD_DIR = str_c(DATA_DIR, '/WRDS-download')
    cat(str_c('Current working directory: ', getwd()))
} else {
    cat(str_c('Please set working dir to "~/onedrive/cc"'))
}

# options for plot
options(repr.plot.width=7, repr.plot.height=4, repr.plot.res = 300)

Current working directory: C:/Users/rossz/Onedrive/CC

# ranking accuracy

In [70]:
suppressWarnings({
    car_ranking_truth = as.data.table(read_feather('data/car_ranking.feather'))
    car_ranking_3y_text = as.data.table(read_feather('data/car_ranking_3y_text.feather'))
    car_ranking_3y_text_fr = as.data.table(read_feather('data/car_ranking_3y_text_fr.feather'))
})
ld(car_ranking_3y_ols)

car_ranking = rbindlist(list(car_ranking_truth[, .(roll_type, window, docid, car=t_car, model='truth')],
                             car_ranking_3y_text[, .(roll_type, window, docid, car=y_car, model='tsfm-text')],
                             car_ranking_3y_text_fr[, .(roll_type, window, docid, car=y_car, model='tsfm-text-fr')],
                             car_ranking_3y_ols[, .(roll_type, window, docid, car=y_car, model='ols')]),
                        fill=T)
valid_docid = intersect(car_ranking[roll_type=='1y', docid], car_ranking[roll_type==''])

-car_ranking_3y_ols- already exists, will NOT load again!  (0 secs)


In [88]:
top_n = 10

car_ranking = car_ranking_truth[, .(roll_type, window, docid, car=t_car)
    ][car_ranking_3y_ols[, .(roll_type, window, docid, car_ols=y_car)], on=.(roll_type, window, docid), nomatch=NULL
    ][car_ranking_3y_text[, .(roll_type, window, docid, car_text=y_car)], on=.(roll_type, window, docid), nomatch=NULL
    ][car_ranking_3y_text_fr[, .(roll_type, window, docid, car_text_fr=y_car)], on=.(roll_type, window, docid), nomatch=NULL
    ][, {
         top_truth = order(-car)[1:top_n]
         top_ols = order(-car_ols)[1:top_n]
         top_text = order(-car_text)[1:top_n]
         top_text_fr = order(-car_text_fr)[1:top_n]
    
         btm_truth = order(car)[1:top_n]
         btm_ols = order(car_ols)[1:top_n]
         btm_text = order(car_text)[1:top_n]
         btm_text_fr = order(car_text_fr)[1:top_n]
    
         list(ols_winner_acc=length(intersect(top_truth, top_ols))/length(top_truth)*100,
              text_winner_acc = length(intersect(top_truth, top_text))/length(top_truth)*100,
              text_fr_winner_acc = length(intersect(top_truth, top_text_fr))/length(top_truth)*100,
              ols_loser_acc = length(intersect(btm_truth, btm_ols))/length(btm_truth)*100,
              text_loser_acc = length(intersect(btm_truth, btm_text))/length(btm_text)*100,
              text_fr_loser_acc = length(intersect(btm_truth, btm_text_fr))/length(btm_text)*100)
      },
      keyby=.(roll_type, window)]

car_ranking[, lapply(.SD, mean), .SDcols=c('ols_winner_acc', 'text_fr_winner_acc', 'ols_loser_acc', 'text_fr_loser_acc')]

ols_winner_acc,text_fr_winner_acc,ols_loser_acc,text_fr_loser_acc
<dbl>,<dbl>,<dbl>,<dbl>
30.625,30.9375,19.6875,20.625


# backtest

In [129]:
# load gvkey_permno_link
ld(gvkey_permno_link)

# load targets_df
targets_df = as.data.table(read_feather('data/f_sue_keydevid_car_finratio_vol_transcriptid_sim_text.feather'))
all_cols = names(targets_df)
text_cols = c('text_present', 'text_qa', 'text_ans', 'text_ques', 'text_all')
non_text_cols = all_cols[!all_cols %in% text_cols]
targets_df = targets_df[, ..non_text_cols]

# load car
car = fread('./data/CAR/cars_30d_call.csv', colClass=c('integer', 'double', rep('character', times=2), 'integer', rep('double', times=7)))[,
      ':='(edate=ymd(edate), rdate=ymd(rdate))
    ][isevt==1 & (evttime %between% c(0,30))]

-gvkey_permno_link- already exists, will NOT load again!  (0 secs)


In [None]:
system.time({
    
buy_line = 10
buy_start = ymd('2011-01-01')
buy_end = ymd('2018-12-31')
n_stock_max = 10

backtest = targets_df[, .(gvkey, ciq_call_date, car_0_30)
    ][gvkey_permno_link[, .(gvkey, permno=lpermno)], on=.(gvkey), nomatch=NULL
    ][car_0_30>=buy_line 
    ][car[, .(permno, edate, rdate, evttime, ret)], on=.(permno, ciq_call_date=edate), nomatch=NULL
    ][order(rdate)
    ][, ':='(permno=NULL)
    ][rdate %between% c(buy_start, buy_end)]

len_backtest = nrow(backtest)

v_cash = double(len_backtest)
v_stock = double(len_backtest)
n_stock = double(len_backtest)
position = vector("list", len_backtest)

for (i in 1:len_backtest) {
    if (i==1) {
        last_v_cash = 1
        last_v_stock = 0
        last_n_stock = 0
        last_position = list()
    } else {
        last_v_cash = v_cash[i-1]
        last_v_stock = v_stock[i-1]
        last_n_stock = n_stock[i-1]
        last_position = position[[i-1]]
    }
    
    gvkey = backtest[i, gvkey]
    rdate = backtest[i, rdate]
    evttime = backtest[i, evttime]
    ret = backtest[i, ret]
    
    # on call record
    if (evttime==0) {
        # if n_stock < max, buy
        if (last_n_stock<n_stock_max) {
            v_buy = last_v_cash/(n_stock_max-last_n_stock)
            
            v_cash[i] = last_v_cash-v_buy
            v_stock[i] = last_v_stock+v_buy
            n_stock[i] = last_n_stock+1

            # if stock already in position, add its weight
            if (gvkey %in% names(last_position)) {
                last_position[[gvkey]] = last_position[[gvkey]]+v_buy
                position[[i]] = last_position
            }
            # if not in position, add to position
            else {
                last_position[[gvkey]] = v_buy
                position[[i]] = last_position
            }
        }
        # if n_stock == max, do nothing
        else {
            v_cash[i] = last_v_cash
            v_stock[i] = last_v_stock
            n_stock[i] = last_n_stock
            position[[i]] = last_position
        }
    }
    # on exit records
    else if (evttime==30) {
        # if gvkey in pos, exit return
        if (gvkey %in% names(last_position)) {
            v_sell = last_position[[gvkey]] * (1+ret)

            v_cash[i] = last_v_cash+v_sell
            v_stock[i] = last_v_stock-last_position[[gvkey]]
            n_stock[i] = last_n_stock-1
            
            last_position[[gvkey]] = NULL
            position[[i]] = last_position
        }
        # if not in (e.g., first few lines of data are evttime==30), skip
        else {
            v_cash[i] = last_v_cash
            v_stock[i] = last_v_stock
            n_stock[i] = last_n_stock
            position[[i]] = last_position
        }
    } 
    # on other records, compute value change
    else if (evttime %between% c(1, 29)) {
        # if gvkey in pos, compute return
        if (gvkey %in% names(last_position)) {
            v_change = last_position[[gvkey]] * ret

            last_position[[gvkey]] = last_position[[gvkey]]+v_change
            position[[i]] = last_position

            v_cash[i] = last_v_cash
            v_stock[i] = last_v_stock+v_change
            n_stock[i] = last_n_stock
        }
        # if not in (e.g., first few lines of data are evttime>0), skip
        else {
            v_cash[i] = last_v_cash
            v_stock[i] = last_v_stock
            n_stock[i] = last_n_stock
            position[[i]] = last_position
        }
    }
}

backtest = data.table(backtest, n_stock=n_stock, v_cash=v_cash, v_stock=v_stock, position=position)[, ':='(v_total=v_cash+v_stock)]
backtest
sv(backtest)
    
})

In [187]:
sum_list <- function(l) {
    sum(unlist(l))
}

backtest[, .(v_stock, v_pos=sapply(position, sum_list))
    ][abs(v_stock-v_pos)>=0.000000001]

v_stock,v_pos
<dbl>,<dbl>


In [None]:
backtest[, tail(.SD,1), keyby=.(rdate)] %>%
    plot_ly(x=~rdate, y=~v_total, type='scatter', mode='lines') %>%
    plotly::layout(autosize=F)