## Appendix H (Table H.1-14)

In [1]:
dir_path = '/Users/jeremyzyang/Library/CloudStorage/GoogleDrive-jeremy.z.yang@gmail.com/My Drive/tiktok_revision'
setwd(dir_path)

In [2]:
load('tiktok.RData')
options(warn = -1)

In [3]:
suppressWarnings(suppressMessages(library(data.table)))
suppressWarnings(suppressMessages(library(tidyverse)))
suppressWarnings(suppressMessages(library(lfe)))
suppressWarnings(suppressMessages(library(patchwork)))
suppressWarnings(suppressMessages(library(zoo)))
suppressWarnings(suppressMessages(library(doParallel)))
suppressWarnings(suppressMessages(library(staggered)))
suppressWarnings(suppressMessages(library(fixest)))
suppressWarnings(suppressMessages(library(faux)))
suppressWarnings(suppressMessages(library(stargazer)))
suppressWarnings(suppressMessages(library(xgboost)))
suppressWarnings(suppressMessages(library(caret)))

In [4]:
sessionInfo()

R version 4.2.1 (2022-06-23)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur ... 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] caret_6.0-93      lattice_0.20-45   xgboost_1.6.0.1   stargazer_5.2.3  
 [5] faux_1.1.0        fixest_0.10.4     staggered_1.1     doParallel_1.0.17
 [9] iterators_1.0.14  foreach_1.5.2     zoo_1.8-11        patchwork_1.1.2  
[13] lfe_2.8-8         Matrix_1.5-1      forcats_0.5.2     stringr_1.4.1    
[17] dplyr_1.0.10      purrr_0.3.4       readr_2.1.3       tidyr_1.2.1      
[21] tibble_3.1.8      ggplot2_3.3.6     tidyverse_1.3.2   data.table_1.14.2

loaded via a names

In [5]:
ls()

In [6]:
df = model_evaluation

In [7]:
# create separate data frames for treated and control products

df = df %>% group_by(taobao_id) %>% 
  mutate(treated_mean = mean(treated, na.rm = T))

df %>% distinct(taobao_id, .keep_all = T) %>%
  filter(treated_mean != 1) %>%
  pull(treated_mean) %>% table %>% sum

df_treat = filter(df, treated_mean != 1)
df_control = filter(df, treated_mean == 1)

treat_id = unique(df_treat$taobao_id)

In [8]:
# impute daily sales

id = unique(df$taobao_id)
rev_day = data.frame()

df = distinct(df, taobao_id, updated_time, .keep_all = T)

for (i in id) { # loop over all products 
df_p = select(df, rev, taobao_id, updated_time_rel) %>% filter(taobao_id == i) %>% arrange(updated_time_rel)
df_p$updated_time_rel = as.numeric(df_p$updated_time_rel)

s = full_seq(df_p$updated_time_rel, 1) # fill in missing dates
rev_day_p_c = data.frame(rep(i,length(s)), s)
colnames(rev_day_p_c) = c('taobao_id','updated_time_rel')

df_p = left_join(rev_day_p_c, df_p, by = c("taobao_id", "updated_time_rel"))
df_p = fill(df_p, rev) # imputing revenues on missing days by the previous value

rev_day_p = vector()

for (j in 1:nrow(df_p)) {

  rev_day_p[j] = ifelse( # imputing daily revenue
    df_p$updated_time_rel[j] == 0,
    df_p$rev[1]/30,
    ifelse(df_p$updated_time_rel[j] < 30,
                               df_p$rev[j] - df_p$rev[j-1] + df_p$rev[1]/30,
                               df_p$rev[j] - df_p$rev[j-1] + rev_day_p[j-30]
                               ))
}

df_p$rev_day = rev_day_p

rev_day = rbind(rev_day, df_p)

}

In [9]:
df$updated_time_rel = as.numeric(df$updated_time_rel)

df_day = left_join(rev_day, 
                     select(df, -rev), by = c('taobao_id','updated_time_rel')) %>% drop_na

In [10]:
df_day_treat = filter(df_day, taobao_id %in% treat_id)
df_day_control = filter(df_day, !(taobao_id %in% treat_id))

In [11]:
tau = vector()
gamma = vector()
r2 = vector()
r2_adj = vector()
  
for (i in seq(1,length(treat_id))) { # loop over all treated products
        
    df_day_treat_i = filter(df_day_treat, taobao_id == treat_id[[i]])
    df_day_i = rbind(df_day_treat_i, df_day_control)
    
    model = felm(rev_day ~ 
                   + treated + search                 
                 |as.factor(updated_time) + as.factor(taobao_id)|0|taobao_id + updated_time,
                 df_day_i
    )
    
    tau[i] = as.numeric(coef(summary(model))[1,1])
    gamma[i] = as.numeric(coef(summary(model))[2,1])
    r2[i] = summary(model)$r2
    r2_adj[i] = summary(model)$r2adj
    
  }

In [12]:
d = data.frame(taobao_id = treat_id, tau = tau, gamma = gamma, r2 = r2, r2_adj = r2_adj)

d = left_join(d, 
              df %>% 
              distinct(taobao_id, .keep_all = T) %>% 
              select(taobao_id,
                            pe_score, p_score, e_score, like, comment, share,
                               price, discount, avg_search, category,
                               gender,fans,avg_play,influencer_price,expected_cpm,order_cnt, influencer_id,
                               pe_score_like, pe_score_comment, pe_score_unsup), by = 'taobao_id')

#### Table H.1

In [13]:
m6_like = felm(tau ~ 
          pe_score_like
             + p_score*e_score                   
               
             + avg_search 
               + price
               + discount
               
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

m6_comment = felm(tau ~ 
              + pe_score_comment
                + p_score*e_score
                   
             + avg_search 
               + price
               + discount
             
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

m6_unsup = felm(tau ~ 
          + pe_score_unsup
             + p_score*e_score
                   
             + avg_search 
               + price
               + discount
             
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

vars.order = c("pe_score_like", "pe_score_comment", "pe_score_unsup",
               "e_score", "p_score", "p_score:e_score", 
               "gender", "fans","avg_play","influencer_price","expected_cpm","order_cnt",
               "avg_search", "price", "discount")

stargazer(m6_like, m6_comment, m6_unsup,
          digits = 2,
          type = 'text',
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001),
          order=paste0("^", vars.order , "$")
         )


                                    Dependent variable:     
                               -----------------------------
                                            tau             
                                  (1)       (2)       (3)   
------------------------------------------------------------
pe_score_like                   119.03*                     
                                (53.35)                     
                                                            
pe_score_comment                          160.33*           
                                          (67.34)           
                                                            
pe_score_unsup                                       24.69  
                                                    (30.23) 
                                                            
e_score                          -5.84     -0.99    -10.67  
                                (32.87)   (33.07)   (33.21) 
                       

#### Table H.2

In [15]:
set.seed(101)

x <- sparse.model.matrix(tau ~ ., data = select(d, tau,
                                                                                    pe_score_like,
                                                                                                            p_score, e_score,
                                                 fans, avg_play, influencer_price, expected_cpm, order_cnt,gender,

                                                    avg_search, price, discount, category,


                                                           ) %>% drop_na)[,-1]

xgb = xgboost(data = as.matrix(x), 
              label = d$tau, nround = 20, verbose = 0)

xgb.importance(model = xgb) %>% top_n(10, Gain) %>% stargazer(summary=FALSE, rownames=FALSE, type = 'text')


Feature          Gain  Cover Frequency
--------------------------------------
pe_score_like    0.864 0.257   0.303  
discount         0.037 0.140   0.092  
p_score          0.031 0.164   0.209  
e_score          0.026 0.008   0.041  
fans             0.015 0.059   0.062  
expected_cpm     0.008 0.095   0.073  
avg_search       0.006 0.024   0.026  
avg_play         0.003 0.054   0.051  
order_cnt        0.002 0.051   0.053  
influencer_price 0.002 0.020   0.019  
--------------------------------------


#### Table H.3

In [16]:
set.seed(101)

x <- sparse.model.matrix(tau ~ ., data = select(d, tau,
                                                                                    pe_score_comment,
                                                                                                            p_score, e_score,
                                                 fans, avg_play, influencer_price, expected_cpm, order_cnt,gender,

                                                    avg_search, price, discount, category,


                                                           ) %>% drop_na)[,-1]

xgb = xgboost(data = as.matrix(x), 
              label = d$tau, nround = 20, verbose = 0)

xgb.importance(model = xgb) %>% top_n(10, Gain) %>% stargazer(summary=FALSE, rownames=FALSE, type = 'text')


Feature             Gain  Cover Frequency
-----------------------------------------
pe_score_comment    0.857 0.257   0.292  
p_score             0.041 0.209   0.149  
e_score             0.021 0.052   0.104  
discount            0.020 0.091   0.070  
expected_cpm        0.019 0.089   0.077  
avg_search          0.012 0.059   0.038  
order_cnt           0.011 0.049   0.043  
fans                0.011 0.064   0.063  
categoryElectronics 0.003 0.011   0.018  
avg_play            0.002 0.044   0.050  
-----------------------------------------


#### Table H.4

In [17]:
set.seed(101)

x <- sparse.model.matrix(tau ~ ., data = select(d, tau,
                                                                                    pe_score_unsup,
                                                                                                            p_score, e_score,
                                                 fans, avg_play, influencer_price, expected_cpm, order_cnt,gender,

                                                    avg_search, price, discount, category,


                                                           ) %>% drop_na)[,-1]

xgb = xgboost(data = as.matrix(x), 
              label = d$tau, nround = 20, verbose = 0)

xgb.importance(model = xgb) %>% top_n(10, Gain) %>% stargazer(summary=FALSE, rownames=FALSE, type = 'text')


Feature          Gain  Cover Frequency
--------------------------------------
avg_play         0.768 0.140   0.098  
expected_cpm     0.085 0.176   0.097  
pe_score_unsup   0.031 0.134   0.218  
p_score          0.030 0.145   0.128  
influencer_price 0.029 0.052   0.057  
e_score          0.021 0.041   0.088  
discount         0.009 0.071   0.093  
categoryClothing 0.006 0.031   0.021  
fans             0.006 0.068   0.057  
avg_search       0.004 0.018   0.028  
--------------------------------------


#### Table H.5

In [18]:
m6_like = felm(tau ~ 
          + pe_score
             + p_score*like
             + avg_search 
               + price
               + discount
             
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     like = like/1000000,
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

m6_comment = felm(tau ~ 
          + pe_score
             + p_score*comment
             + avg_search 
               + price
               + discount
             
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     comment = comment/1000000,
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

m6_share = felm(tau ~ 
          + pe_score
             + p_score*share
             + avg_search 
               + price
               + discount
             
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     share = share/1000000,
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

vars.order = c("pe_score", "like", "comment", "share", "p_score", "p_score:like", "p_score:comment", "p_score:share",
               "gender", "fans","avg_play","influencer_price","expected_cpm","order_cnt",
               "avg_search", "price", "discount", 
               "Constant")

stargazer(m6_like, m6_comment, m6_share,
          digits = 2,
          type = 'text',
          star.char = c("", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001),
          order=paste0("^", vars.order , "$")
         )


                                     Dependent variable:      
                               -------------------------------
                                             tau              
                                  (1)       (2)        (3)    
--------------------------------------------------------------
pe_score                       100.62*** 100.98***  100.81*** 
                                (28.84)   (28.96)    (28.80)  
                                                              
like                             24.76                        
                                (72.52)                       
                                                              
comment                                    603.40             
                                         (3,199.10)           
                                                              
share                                                 732.34  
                                                     (

#### Table H.6

In [19]:
set.seed(101)

x <- sparse.model.matrix(tau ~ ., data = select(d, tau,
                                                                                    pe_score,
                                                                                                            p_score,
                                                like, comment, share,
                                                 fans, avg_play, influencer_price, expected_cpm, order_cnt,gender,

                                                    avg_search, price, discount, category,


                                                           ) %>% drop_na)[,-1]

xgb = xgboost(data = as.matrix(x), 
              label = d$tau, nround = 20, verbose = 0)

xgb.importance(model = xgb) %>% top_n(10, Gain) %>% stargazer(summary=FALSE, rownames=FALSE, type = 'text')


Feature          Gain  Cover Frequency
--------------------------------------
pe_score         0.861 0.230   0.287  
expected_cpm     0.037 0.081   0.068  
p_score          0.036 0.149   0.135  
comment          0.016 0.099   0.070  
discount         0.013 0.064   0.068  
like             0.010 0.056   0.073  
avg_search       0.007 0.051   0.028  
share            0.006 0.063   0.070  
influencer_price 0.005 0.023   0.031  
fans             0.003 0.060   0.054  
--------------------------------------


#### Table H.7

In [20]:
df_day_control = df_day_control %>% 
group_by(taobao_id) %>% 
mutate(updated_time_min = min(updated_time)) %>%
mutate(diff = updated_time_min - video_posted_time)

df_day_control_30day = df_day_control %>% # select control products that had video ads posted more than 30 days ago
filter(diff >= 30) %>% 
select(-diff, -updated_time_min)

search$taobao_id = as.character(search$taobao_id)
search$updated_time = as.Date(search$updated_time)

df_first = df %>% group_by(taobao_id) %>% arrange(updated_time) %>% filter(row_number()==1) %>% select(taobao_id, updated_time) %>% rename(enter_time = updated_time)

search = left_join(search, df_first, by = 'taobao_id') %>% filter(updated_time < enter_time) %>% group_by(taobao_id) %>% mutate(search_mean = mean(search),
                                                                                                                                search_sd = sd(search)) %>% drop_na

search = search %>% group_by(taobao_id) %>% mutate(unusual = ifelse(search <= search_mean - 2*search_sd | search >= search_mean + 2*search_sd, 1, 0)) # identify products with unusual search volumes 
search2 = search %>% group_by(taobao_id) %>% summarize(unusual = sum(unusual))
unusual = search2 %>% filter(unusual > 0) %>% pull(taobao_id)

df_day_control_30day = df_day_control_30day %>% filter(!taobao_id %in% unusual) # select control products that did not experience search volume fluctuations 30 days prior to entering the sales panel

In [21]:
tau_30day = vector()
gamma_30day = vector()
  
for (i in seq(1,length(treat_id))) {
        
    df_day_treat_i = filter(df_day_treat, taobao_id == treat_id[[i]])
    df_day_i = rbind(df_day_treat_i, df_day_control_30day)
    
    model = felm(rev_day ~
                   + treated + search                 
                 |as.factor(updated_time) + as.factor(taobao_id)|0|taobao_id + updated_time,
                 df_day_i
    )
    
    tau_30day[i] = as.numeric(coef(summary(model))[1,1])
    gamma_30day[i] = as.numeric(coef(summary(model))[2,1])
    
  }

In [22]:
data.frame(tau_30day, gamma_30day) %>% 
stargazer(type = 'text', digits = 2, median = T)


Statistic    N  Mean St. Dev.   Min   Median  Max  
---------------------------------------------------
tau_30day   259 1.56  48.01   -126.30  1.88  710.79
gamma_30day 259 0.25   0.86    -4.09   0.19  10.99 
---------------------------------------------------


In [23]:
d$tau_30day = tau_30day

#### Table H.8

In [24]:
date = as.Date('2019-06-01')

df_day_treat_control = df_day_treat %>% # select products not yet treated by 2019/06/01 as controls
filter(video_posted_time >= date) %>%
filter(updated_time < date)

df_day_treat_treat = df_day_treat %>% 
filter(video_posted_time < date)

treat_id_treat = unique(df_day_treat_treat$taobao_id)

In [25]:
tau_treat = vector()
gamma_treat = vector()
  
  for (i in seq(1,length(treat_id_treat))) {
        
    df_day_treat_i = filter(df_day_treat_treat, taobao_id == treat_id_treat[[i]])
      
    df_day_i = rbind(df_day_treat_i, df_day_treat_control)
    
    model = felm(rev_day ~
                   + treated + search                 
                 |as.factor(updated_time) + as.factor(taobao_id)|0|taobao_id + updated_time,
                 df_day_i
    )
    
    tau_treat[i] = as.numeric(coef(summary(model))[1,1])
    gamma_treat[i] = as.numeric(coef(summary(model))[2,1])
    
  }

tau_treat[is.nan(tau_treat)] = 0 

In [26]:
data.frame(tau_treat, gamma_treat) %>% 
stargazer(type = 'text', digits = 2, median = T)


Statistic    N   Mean  St. Dev.   Min   Median   Max   
-------------------------------------------------------
tau_treat   146 19.73   180.81  -101.07  2.45  2,178.65
gamma_treat 146 -29.25   2.03   -30.17  -29.53  -10.17 
-------------------------------------------------------


In [27]:
d2 = data.frame(taobao_id = treat_id_treat, 
               tau_treat)

d = left_join(d, d2, by = 'taobao_id')

#### Table H.9

In [28]:
m6_control1 = felm(tau_30day ~ 
               + pe_score 
   
             + p_score*e_score
                   
             + avg_search 
               + price
               + discount
             
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

m6_control2 = felm(tau_treat ~ 
               + pe_score 
   
             + p_score*e_score
                   
             + avg_search 
               + price
               + discount
             
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
             | category
                 , d %>% mutate(
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

vars.order = c("pe_score", "e_score", "p_score", "p_score:e_score", "gender", "fans","avg_play","influencer_price","expected_cpm","order_cnt", "avg_search", "price", "discount", "Constant")

stargazer(m6_control1, m6_control2,
          digits = 2,
          type = 'text',
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001),
          order=paste0("^", vars.order , "$")
         )


                           Dependent variable:        
                    ----------------------------------
                       tau_30day         tau_treat    
                          (1)               (2)       
------------------------------------------------------
pe_score               103.04***         601.38***    
                        (29.17)          (164.31)     
                                                      
e_score                  -2.98            168.97      
                        (32.65)          (182.23)     
                                                      
p_score                 -110.90           -235.92     
                        (81.18)          (456.95)     
                                                      
p_score:e_score          115.07           -183.98     
                        (158.29)         (880.90)     
                                                      
gender                    5.65             0.93       
         

#### Table H.10

In [29]:
set.seed(101)

x <- sparse.model.matrix(tau_30day ~ ., data = select(d, tau_30day,
                                                                                    pe_score,
                                                                                                            p_score, e_score,
                                                 fans, avg_play, influencer_price, expected_cpm, order_cnt,gender,

                                                    avg_search, price, discount, category,


                                                           ))[,-1]

xgb = xgboost(data = as.matrix(x), 
              label = d$tau_30day, nround = 20, verbose = 0)

xgb.importance(model = xgb) %>% top_n(10, Gain) %>% stargazer(summary=FALSE, rownames=FALSE, type = 'text')


Feature             Gain  Cover Frequency
-----------------------------------------
pe_score            0.882 0.220   0.281  
p_score             0.035 0.249   0.169  
expected_cpm        0.025 0.103   0.094  
e_score             0.017 0.032   0.114  
discount            0.016 0.088   0.058  
fans                0.008 0.083   0.072  
avg_search          0.004 0.095   0.050  
categoryElectronics 0.003 0.014   0.017  
order_cnt           0.003 0.030   0.028  
price               0.003 0.050   0.047  
-----------------------------------------


#### Table H.11

In [30]:
set.seed(101)

x <- sparse.model.matrix(tau_treat ~ ., data = select(d, tau_treat,
                                                                                    pe_score,
                                                                                                            p_score, e_score,
                                                 fans, avg_play, influencer_price, expected_cpm, order_cnt,gender,

                                                    avg_search, price, discount, category,


                                                           ) %>% drop_na)[,-1]

xgb = xgboost(data = as.matrix(x), 
              label = d$tau_treat[d$tau_treat %>% complete.cases], nround = 20, verbose = 0)

xgb.importance(model = xgb) %>% top_n(10, Gain) %>% stargazer(summary=FALSE, rownames=FALSE, type = 'text')


Feature            Gain  Cover Frequency
----------------------------------------
pe_score          0.991  0.208   0.287  
p_score           0.003  0.195   0.132  
discount          0.002  0.115   0.088  
order_cnt         0.001  0.090   0.073  
influencer_price  0.001  0.053   0.047  
e_score           0.001  0.077   0.114  
categoryFurniture 0.001  0.049   0.019  
avg_play          0.001  0.053   0.044  
fans              0.0004 0.062   0.079  
price             0.0002 0.057   0.054  
----------------------------------------


#### Table H.12

In [31]:
search$updated_time = as.Date(search$updated_time)
search$taobao_id = as.character(search$taobao_id)

search = left_join(search, df %>% 
                   group_by(taobao_id) %>% 
                   arrange(updated_time) %>%
                   mutate(updated_time_min = min(updated_time)) %>%
                   select(taobao_id, updated_time_min) %>% 
                   distinct(taobao_id, .keep_all = T), 
                   by = 'taobao_id')

search$updated_time_rel = as.numeric(search$updated_time - search$updated_time_min)

search2 = search %>% # calculate weight 
group_by(taobao_id) %>%
mutate(updated_time_rel_min = min(updated_time_rel)) %>%
filter(updated_time_rel_min <= -30) %>%
filter(updated_time_rel > -30 & updated_time_rel <= 0) %>%
mutate(weight = search/sum(search))

search2 = search2 %>% mutate(weight = ifelse(is.na(weight), 1/30, weight)) 

In [32]:
# impute daily sales

id = unique(df$taobao_id)   
rev_day_alt = data.frame()

df = distinct(df, taobao_id, updated_time, .keep_all = T)

for (i in id) {
df_p = select(df, rev, taobao_id, updated_time, updated_time_rel) %>% filter(taobao_id == i) %>% arrange(updated_time_rel)
df_p$updated_time_rel = as.numeric(df_p$updated_time_rel)

s = full_seq(df_p$updated_time_rel, 1)
rev_day_p_c = data.frame(rep(i,length(s)), s)
colnames(rev_day_p_c) = c('taobao_id','updated_time_rel')

df_p = left_join(rev_day_p_c, df_p, by = c("taobao_id", "updated_time_rel"))
df_p = fill(df_p, rev)

rev_day_p = vector()

for (j in 1:nrow(df_p)) {

  rev_day_p[j] = 
    
    ifelse(
        
    df_p$taobao_id %in% search2$taobao_id,    
    
    ifelse(

      df_p$updated_time_rel[j] == 0,
      df_p$rev[1] * filter(search2, taobao_id == i, updated_time_rel == 0) %>% pull(weight),
              ifelse(df_p$updated_time_rel[j] < 30,
                               df_p$rev[j] - df_p$rev[j-1] + df_p$rev[1] * filter(search2, taobao_id == i, updated_time_rel == j-30) %>% pull(weight),
                               df_p$rev[j] - df_p$rev[j-1] + rev_day_p[j-30]
                               )



  ),
    
    ifelse(
        df_p$updated_time_rel[j] == 0,
        df_p$rev[1]/30,
        ifelse(df_p$updated_time_rel[j] < 30,
                                   df_p$rev[j] - df_p$rev[j-1] + df_p$rev[1]/30,
                                   df_p$rev[j] - df_p$rev[j-1] + rev_day_p[j-30]
                                   ))
    )
    
    



}

df_p$rev_day = rev_day_p

rev_day_alt = rbind(rev_day_alt, df_p)

}

In [33]:
df$updated_time_rel = as.numeric(df$updated_time_rel)

df_day_alt = left_join(rev_day_alt, 
                     select(df, -rev, -updated_time), by = c('taobao_id','updated_time_rel'))

df_day_treat = filter(df_day_alt, taobao_id %in% treat_id) 
df_day_control = filter(df_day_alt, !(taobao_id %in% treat_id)) 

treat_id = unique(df_day_treat$taobao_id)

In [34]:
tau_alt = vector()
gamma_alt = vector()
  
for (i in seq(1,length(treat_id))) {
        
    df_day_treat_i = filter(df_day_treat, taobao_id == treat_id[[i]])
    df_day_i = rbind(df_day_treat_i, df_day_control)
    
    model = felm(rev_day ~
                   + treated + search                 
                 |as.factor(updated_time) + as.factor(taobao_id)|0|taobao_id + updated_time,
                 df_day_i
    )
    
    tau_alt[i] = as.numeric(coef(summary(model))[1,1])
    gamma_alt[i] = as.numeric(coef(summary(model))[2,1])
    
  }
tau_alt[is.nan(tau_alt)] = 0

In [35]:
data.frame(tau_alt, gamma_alt) %>% 
stargazer(type = 'latex', digits = 2, median = T)


% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
% Date and time: Tue, Jul 11, 2023 - 00:19:55
\begin{table}[!htbp] \centering 
  \caption{} 
  \label{} 
\begin{tabular}{@{\extracolsep{5pt}}lcccccc} 
\\[-1.8ex]\hline 
\hline \\[-1.8ex] 
Statistic & \multicolumn{1}{c}{N} & \multicolumn{1}{c}{Mean} & \multicolumn{1}{c}{St. Dev.} & \multicolumn{1}{c}{Min} & \multicolumn{1}{c}{Median} & \multicolumn{1}{c}{Max} \\ 
\hline \\[-1.8ex] 
tau\_alt & 259 & 1.92 & 47.51 & $-$123.58 & 2.03 & 699.21 \\ 
gamma\_alt & 259 & 0.30 & 0.01 & 0.28 & 0.30 & 0.39 \\ 
\hline \\[-1.8ex] 
\end{tabular} 
\end{table} 


In [36]:
d2 = data.frame(taobao_id = treat_id, tau_alt = tau_alt)

d = left_join(d, 
              d2, by = 'taobao_id')

#### Table H.13

In [37]:
m6_alt = felm(tau_alt ~ 
               + pe_score 
             + p_score*e_score
             + avg_search 
               + price
               + discount
               + fans
               + avg_play
               + influencer_price
               + expected_cpm
               + order_cnt 
               + gender
              | category
                 , d %>% mutate(
                     fans = fans/1000000,
                                avg_play = avg_play/1000000,
                               influencer_price = influencer_price/1000000,
                               expected_cpm = expected_cpm/1000000))

vars.order = c("pe_score", "e_score", "p_score", "p_score:e_score", "gender", "fans","avg_play","influencer_price","expected_cpm","order_cnt", "avg_search", "price", "discount", "Constant")

stargazer(m6_alt,
          digits = 2,
          type = 'text',
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001),
          order=paste0("^", vars.order , "$")
         )


                        Dependent variable:    
                    ---------------------------
                              tau_alt          
-----------------------------------------------
pe_score                     101.08***         
                              (28.88)          
                                               
e_score                        -2.46           
                              (32.33)          
                                               
p_score                       -115.00          
                              (80.37)          
                                               
p_score:e_score               129.74           
                             (156.71)          
                                               
gender                         5.72            
                              (6.36)           
                                               
fans                           0.13            
                              (1.49)   

#### Table H.14

In [38]:
set.seed(101)

x <- sparse.model.matrix(tau_alt ~ ., data = select(d, tau_alt,
                                                                                    pe_score,
                                                                                                            p_score, e_score,
                                                 fans, avg_play, influencer_price, expected_cpm, order_cnt,gender,

                                                    avg_search, price, discount, category,


                                                           ) %>% drop_na)[,-1]

xgb = xgboost(data = as.matrix(x), 
              label = d$tau_alt, nround = 20, verbose = 0)

xgb.importance(model = xgb) %>% top_n(10, Gain) %>% stargazer(summary=FALSE, rownames=FALSE, type = 'text')


Feature      Gain  Cover Frequency
----------------------------------
pe_score     0.862 0.258   0.301  
expected_cpm 0.041 0.063   0.060  
p_score      0.031 0.246   0.176  
e_score      0.018 0.023   0.081  
categoryFood 0.009 0.011   0.019  
discount     0.007 0.078   0.079  
avg_search   0.006 0.055   0.035  
order_cnt    0.006 0.030   0.035  
fans         0.006 0.085   0.068  
price        0.005 0.051   0.049  
----------------------------------
