# Smoking Cessation Study - Cox regression

In [27]:
library(survival)

sdf = read.csv("../savrw2_survival_data.csv",na.strings="9999.0")    #read the output from the savrw2 notebook
str(sdf)

'data.frame':	436 obs. of  15 variables:
 $ X                     : int  0 1 2 3 4 5 6 7 8 9 ...
 $ CallDayNumber         : num  86 86 86 86 86 86 86 86 86 86 ...
 $ firstcigarettereported: num  2 2 2 2 2 2 2 2 2 2 ...
 $ baseline_b1_001       : num  2 5 2 5 2 2 4 1 5 5 ...
 $ baseline_c1_001       : num  4 5 5 4 4 4 3 4 5 5 ...
 $ baseline_d1_001       : num  4 4 2 4 4 4 2 4 4 5 ...
 $ baseline_b1_002       : num  2 5 2 5 4 2 3 4 4 5 ...
 $ baseline_c1_002       : num  4 5 5 5 4 5 3 4 4 5 ...
 $ baseline_d1_002       : num  4 3 4 4 4 4 2 3 4 5 ...
 $ baseline_b1_003       : num  4 1 4 3 2 4 3 4 5 1 ...
 $ baseline_c1_003       : num  2 1 1 1 2 2 3 4 5 1 ...
 $ baseline_d1_003       : num  2 3 2 1 3 2 3 4 5 1 ...
 $ baseline_b1_004       : num  2 5 1 2 2 2 3 1 4 5 ...
 $ baseline_c1_004       : num  4 4 3 1 4 4 3 1 4 5 ...
 $ baseline_d1_004       : num  4 3 4 4 3 4 3 4 3 5 ...


## Counts by day of call

In [28]:
day             = sdf$CallDayNumber
table(day)

day
  1   2   3   4   5   6   7   8   9  10  11  12  14  16  17  18  19  24  25  26 
119  83  36  16  10  12   6   5  10   3   1   3   2   2   3   1   1   2   1   1 
 27  28  30  31  32  36  38  41  48  52  59  60  63  71  86 
  1   2   1   1   2   2   1   2   1   1   1   1   1   1  41 

## Counts by firstcigarettereported code

In [29]:
fcr_code        = sdf$firstcigarettereported       
table(fcr_code)

fcr_code
  0   1   2 
 62 335  39 

## "Something is seriously wrong"  (using sum of three values)

In [30]:
seriously_wrong = sdf$baseline_b1_001 + sdf$baseline_c1_001 + sdf$baseline_d1_001
table(seriously_wrong)

seriously_wrong
 3  4  5  6  7  8  9 10 11 12 13 14 15 
 8  1  6 15 18 15 35 50 60 63 55 45 58 

## "I am pretty sick"  (using sum of three values)

In [31]:
pretty_sick     = sdf$baseline_b1_002 +sdf$baseline_c1_002 + sdf$baseline_d1_002
table(pretty_sick)

pretty_sick
 3  4  5  6  7  8  9 10 11 12 13 14 15 
 9  4 12 17 17 28 36 52 58 75 40 36 45 

## "My illness is minor"  (using sum of three values)

In [32]:
minor_illness   = sdf$baseline_b1_003 + sdf$baseline_c1_003 + sdf$baseline_d1_003
table(minor_illness)

minor_illness
 3  4  5  6  7  8  9 10 11 12 13 14 15 
57 36 41 63 58 48 45 31 22 19  3  2  4 

## "I have a life threatening illness"  (using sum of three values)

In [33]:
life_threat     = sdf$baseline_b1_004 + sdf$baseline_c1_004 + sdf$baseline_d1_004
table(life_threat)

life_threat
 3  4  5  6  7  8  9 10 11 12 13 14 15 
25  8 16 38 34 44 41 40 33 46 33 26 45 

## Set up event flags for survival analysis

In [34]:
event = 2 - fcr_code            #event code is 0 for right censor, 1 for event on the day specified

sdf2=data.frame(event,day,fcr_code,seriously_wrong,pretty_sick,minor_illness,life_threat)
surv_data = sdf2[sdf2$fcr_code>0,]
str(surv_data)

'data.frame':	374 obs. of  7 variables:
 $ event          : num  0 0 0 0 0 0 0 0 0 0 ...
 $ day            : num  86 86 86 86 86 86 86 86 86 86 ...
 $ fcr_code       : num  2 2 2 2 2 2 2 2 2 2 ...
 $ seriously_wrong: num  10 14 9 13 10 10 9 9 14 15 ...
 $ pretty_sick    : num  10 13 11 14 12 11 8 11 12 15 ...
 $ minor_illness  : num  8 5 7 5 7 8 9 12 15 3 ...
 $ life_threat    : num  10 12 8 7 9 10 9 6 11 15 ...


   ## Cox regression with all four predictors

In [35]:
cr = coxph(Surv(day,event) ~ seriously_wrong + pretty_sick + minor_illness + life_threat,data=surv_data,method="efron")
cr
summary(cr)

Call:
coxph(formula = Surv(day, event) ~ seriously_wrong + pretty_sick + 
    minor_illness + life_threat, data = surv_data, method = "efron")

                    coef exp(coef) se(coef)     z     p
seriously_wrong  0.05080   1.05211  0.02800  1.81 0.070
pretty_sick     -0.06472   0.93733  0.02580 -2.51 0.012
minor_illness   -0.00393   0.99608  0.02338 -0.17 0.867
life_threat     -0.02253   0.97772  0.01981 -1.14 0.255

Likelihood ratio test=8.27  on 4 df, p=0.0822
n= 370, number of events= 331 
   (4 observations deleted due to missingness)

Call:
coxph(formula = Surv(day, event) ~ seriously_wrong + pretty_sick + 
    minor_illness + life_threat, data = surv_data, method = "efron")

  n= 370, number of events= 331 
   (4 observations deleted due to missingness)

                     coef exp(coef)  se(coef)      z Pr(>|z|)  
seriously_wrong  0.050800  1.052112  0.028003  1.814   0.0697 .
pretty_sick     -0.064718  0.937332  0.025802 -2.508   0.0121 *
minor_illness   -0.003928  0.996080  0.023384 -0.168   0.8666  
life_threat     -0.022533  0.977719  0.019814 -1.137   0.2554  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

                exp(coef) exp(-coef) lower .95 upper .95
seriously_wrong    1.0521     0.9505    0.9959     1.111
pretty_sick        0.9373     1.0669    0.8911     0.986
minor_illness      0.9961     1.0039    0.9515     1.043
life_threat        0.9777     1.0228    0.9405     1.016

Concordance= 0.572  (se = 0.025 )
Rsquare= 0.022   (max possible= 1 )
Likelihood ratio test= 8.27  on

   ## Cox regression with only "I am pretty sick"

In [36]:
cr = coxph(Surv(day,event) ~ pretty_sick,data=surv_data,method="efron")
cr
summary(cr)

Call:
coxph(formula = Surv(day, event) ~ pretty_sick, data = surv_data, 
    method = "efron")

               coef exp(coef) se(coef)     z     p
pretty_sick -0.0399    0.9609   0.0183 -2.18 0.029

Likelihood ratio test=4.63  on 1 df, p=0.0314
n= 370, number of events= 331 
   (4 observations deleted due to missingness)

Call:
coxph(formula = Surv(day, event) ~ pretty_sick, data = surv_data, 
    method = "efron")

  n= 370, number of events= 331 
   (4 observations deleted due to missingness)

                coef exp(coef) se(coef)      z Pr(>|z|)  
pretty_sick -0.03990   0.96089  0.01828 -2.183   0.0291 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

            exp(coef) exp(-coef) lower .95 upper .95
pretty_sick    0.9609      1.041    0.9271    0.9959

Concordance= 0.559  (se = 0.024 )
Rsquare= 0.012   (max possible= 1 )
Likelihood ratio test= 4.63  on 1 df,   p=0.03141
Wald test            = 4.76  on 1 df,   p=0.02906
Score (logrank) test = 4.77  on 1 df,   p=0.02894


   ## Cox regression with only "Something is seriously wrong"

In [37]:
cr = coxph(Surv(day,event) ~ seriously_wrong,data=surv_data,method="efron")
cr
summary(cr)

Call:
coxph(formula = Surv(day, event) ~ seriously_wrong, data = surv_data, 
    method = "efron")

                    coef exp(coef) se(coef)     z    p
seriously_wrong -0.00822   0.99181  0.01879 -0.44 0.66

Likelihood ratio test=0.19  on 1 df, p=0.663
n= 370, number of events= 331 
   (4 observations deleted due to missingness)

Call:
coxph(formula = Surv(day, event) ~ seriously_wrong, data = surv_data, 
    method = "efron")

  n= 370, number of events= 331 
   (4 observations deleted due to missingness)

                     coef exp(coef)  se(coef)      z Pr(>|z|)
seriously_wrong -0.008219  0.991815  0.018789 -0.437    0.662

                exp(coef) exp(-coef) lower .95 upper .95
seriously_wrong    0.9918      1.008     0.956     1.029

Concordance= 0.52  (se = 0.024 )
Rsquare= 0.001   (max possible= 1 )
Likelihood ratio test= 0.19  on 1 df,   p=0.6628
Wald test            = 0.19  on 1 df,   p=0.6618
Score (logrank) test = 0.19  on 1 df,   p=0.6618


Note:  The n value and number of events were reduced slightly because predictors are missing in a few cases

   ## Cox regression with only "My illness is minor"

In [38]:
cr = coxph(Surv(day,event) ~ minor_illness,data=surv_data,method="efron")
cr
summary(cr)

Call:
coxph(formula = Surv(day, event) ~ minor_illness, data = surv_data, 
    method = "efron")

                coef exp(coef) se(coef)    z   p
minor_illness 0.0161    1.0162   0.0192 0.84 0.4

Likelihood ratio test=0.7  on 1 df, p=0.404
n= 370, number of events= 331 
   (4 observations deleted due to missingness)

Call:
coxph(formula = Surv(day, event) ~ minor_illness, data = surv_data, 
    method = "efron")

  n= 370, number of events= 331 
   (4 observations deleted due to missingness)

                 coef exp(coef) se(coef)     z Pr(>|z|)
minor_illness 0.01608   1.01621  0.01921 0.837    0.403

              exp(coef) exp(-coef) lower .95 upper .95
minor_illness     1.016      0.984    0.9787     1.055

Concordance= 0.528  (se = 0.024 )
Rsquare= 0.002   (max possible= 1 )
Likelihood ratio test= 0.7  on 1 df,   p=0.4043
Wald test            = 0.7  on 1 df,   p=0.4026
Score (logrank) test = 0.7  on 1 df,   p=0.4026


   ## Cox regression with only "I have a life-threating illness"

In [39]:
cr = coxph(Surv(day,event) ~ life_threat,data=surv_data,method="efron")
cr
summary(cr)

Call:
coxph(formula = Surv(day, event) ~ life_threat, data = surv_data, 
    method = "efron")

               coef exp(coef) se(coef)    z    p
life_threat -0.0222    0.9780   0.0159 -1.4 0.16

Likelihood ratio test=1.95  on 1 df, p=0.162
n= 370, number of events= 331 
   (4 observations deleted due to missingness)

Call:
coxph(formula = Surv(day, event) ~ life_threat, data = surv_data, 
    method = "efron")

  n= 370, number of events= 331 
   (4 observations deleted due to missingness)

                coef exp(coef) se(coef)      z Pr(>|z|)
life_threat -0.02223   0.97801  0.01590 -1.398    0.162

            exp(coef) exp(-coef) lower .95 upper .95
life_threat     0.978      1.022     0.948     1.009

Concordance= 0.54  (se = 0.025 )
Rsquare= 0.005   (max possible= 1 )
Likelihood ratio test= 1.95  on 1 df,   p=0.1625
Wald test            = 1.96  on 1 df,   p=0.162
Score (logrank) test = 1.96  on 1 df,   p=0.1618
