In [1]:
* Zhengting (Johnathan) He
* May 8th, 2021
* healthy-aging project
* Verify Yaxi's code on generting survival time: 98_14wave.do

In [32]:
// set working directories
global root "F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern (zh133@duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival time"
* define path for data sources
global RAW "${root}/raw data"
* define path for output data
global OUT "${root}/out data"
* define path for INTERMEDIATE
global INTER "${root}/inter data"

In [3]:
/*********************************************************************************************************************/
/************************************* I. logical check on death status and date *************************************/
/*********************************************************************************************************************/

/************************************* (1) Extract new added data at current wave *************************************/
use "${RAW}/2000_2014_longitudinal_dataset_released_version1.dta", clear

In [4]:
/************************************* (2) Check the actual values of death date variables, against the codebook for those variables *************************************/
foreach var in d2vyear d5vyear d8vyear d11vyear d14vyear d2vmonth d5vmonth d8vmonth d11vmonth d14vmonth d2vday d5vday d8vday d11vday d14vday dth98_00 dth00_02 dth02_05 dth05_08 dth08_11 dth11_14 {
    codebook `var'
}
// codebook on death variables

// validated death year
* d2vyear, d5vyear, d8vyear, d11vyear, d14vyear: validated year of death*/
* -9:lost to follow up in the 2002/2005/2008/2011/2014 survey*
* -8:died or lost to follow-up in previous waves*
* -7:it is for the deceased persons, not applicable for survivors*
* . 9999: missing*/

// validated death month
* d2vmonth, d5vmonth, d8vmonth, d11vmonth, d14vmonth: validated month of death*/
* -9: lost to follow-up in the 2002/05/08/11/14 survey*/
* -8: died or lost to follow-up in previous waves*/
* -7: it is for the deceased persons, not applicable for survivors*/
* . 99: missing*/

// validated death day
* d0vday, d2vday, d5vday, d8vday, d11vday, d14vday: validated day of death*/
* -9: lost to follow-up in the 2002/05/08/11/14 survey*/ 
* -8: died or lost to follow-up in previous waves*/
* -7: it is for the deceased persons, not applicable for survivors*/
* . 99: missing*/

// survival status
* dth00_02, dth02_05, dth05_08, dth08_11, dth11_14: status of survival, death, or lost to follow-up from 2000-2002/2002-2005/2005-2008/2008-2011/2011-2014 waves*/
* dth**_##:
* -9: lost to follow-up at the ## survey;
* -8: died or lost to follow-up in previous waves;
* 0: surviving at the ## survey;
* 1: died before the ## survey
* dth08_11:
* 2: surviving at 2011 survey but died before 2012 survey (only one, 2011.12.9 died)



--------------------------------------------------------------------------------
d2vyear                                                  validated year of death
--------------------------------------------------------------------------------

                  type:  numeric (int)
                 label:  d2vyear, but 3 nonmissing values are not labeled

                 range:  [-9,2002]                    units:  1
         unique values:  5                        missing .:  0/11,199

            tabulation:  Freq.   Numeric  Label
                         1,541        -9  lost to follow-up in the 2002
                                          survey
                         6,315        -7  the deceased persons only, not
                                          applicable to survivors
                           701      2000  
                         2,028      2001  
                           614      2002  

------------------------------------------------------------------

r(111);
r(111);






In [5]:
/************************************* (3) Check whether there are logical input mistakes between death status for different waves *************************************/
// check whether there are logical mistakes for dth**_##
* If the current death status is -9/0/1, the previous one can only be 0;
* if the current death status is -8, then the previous can only be -8,-9 and 1.
preserve
rename dth00_02 dth2
rename dth02_05 dth3
rename dth05_08 dth4
rename dth08_11 dth5
rename dth11_14 dth6
label drop _all
forv i = 2/5 {
    local j = `i' + 1
    tab dth`i' if dth`j' == -9 | dth`j' == 0 | dth`j' == 1, missing //0
    tab dth`i' if dth`j' == -8, missing //-8, -9, 1
}
restore
keep if (dth05_08 == -9 | dth05_08 == 0 | dth05_08 == 1) & dth02_05 == 1










  status of |
  survival, |
  death, or |
    lost to |
  follow-up |
   from the |
    2000 to |
 2002 waves |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      6,315      100.00      100.00
------------+-----------------------------------
      Total |      6,315      100.00

  status of |
  survival, |
  death, or |
    lost to |
  follow-up |
   from the |
    2000 to |
 2002 waves |      Freq.     Percent        Cum.
------------+-----------------------------------
         -9 |      1,541       31.55       31.55
          1 |      3,343       68.45      100.00
------------+-----------------------------------
      Total |      4,884      100.00

  status of |
  survival, |
  death, or |
    lost to |
  follow-up |
   from the |
    2002 to |
 2005 waves |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      2,628       99.96       99.96
          1 |          1        0

In [6]:
* id = 45107898 were problematic
* id=45107898, have two died dates, 02_05wave:2002.8.29, and 05_08wave:2006.12.21, both dth02_05 and dth05_08 are 1. Inferring from the data, the person should die in 05_08wave, as there is detailed info in 2005 for that person.

In [7]:
*****************************create work.dta, which has changed the death status according results above, and renanme dth**_##***********
clear
use "${RAW}/2000_2014_longitudinal_dataset_released_version1.dta"  //******need to be changed

In [8]:
replace dth02_05 = 0 if id == 45107898
replace d5vday = -7 if id == 45107898
replace d5vmonth = -7 if id == 45107898
replace d5vyear = -7 if id == 45107898


(1 real change made)

(1 real change made)

(1 real change made)

(1 real change made)


In [9]:
rename dth00_02 dth2
rename dth02_05 dth5
rename dth05_08 dth8
rename dth08_11 dth11
rename dth11_14 dth14

In [10]:
global waves "2 5 8 11 14"                                                    //******need to be changed
global year1 "2000 2001 2002 2003 2005 2006 2007 2009 2010 2011 2013 2014"
global year2 "2000 2004 2008 2012"
global months "4 6 9 11"
global wavein "in0 in2 in5 in8 in11 in14"
save "${INTER}/work.dta", replace







(note: file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern
>  (zh133@duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival ti
> me/inter data/work.dta not found)
file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern (zh133
> @duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival time/inte
> r data/work.dta saved


In [11]:
/************************************* (4) Check whether there are logical input mistakes between death status and the verified death year, month, day *************************************/
// check whether there are logical mistakes between d*vyear d*vmonth d*vday dth**_##
foreach i of global waves {
    // unify missing value to "99"
    recode d`i'vday(. 88=99) 
    recode d`i'vmonth(. 88=99)
    recode d`i'vyear(. 8888 9999=99)  //no 88 for all the 4 vars
    
    replace d`i'vyear = 1 if d`i'vyear > 1997 & d`i'vyear < 2020
    replace d`i'vmonth = 1 if d`i'vmonth > 0 & d`i'vmonth < 13
    replace d`i'vday = 1 if d`i'vday > 0 & d`i'vday < 32
    
    bys d`i'vyear: gen fre`i'_year = _N
    bys d`i'vmonth: gen fre`i'_month = _N
    bys d`i'vday: gen fre`i'_day = _N
    bys dth`i': gen fre`i'_dth = _N
}
label drop _all
save "${INTER}/work1.dta", replace


(d2vday: 0 changes made)
(d2vmonth: 0 changes made)
(d2vyear: 0 changes made)
(3,343 real changes made)
(3,004 real changes made)
(3,141 real changes made)
(d5vday: 0 changes made)
(d5vmonth: 0 changes made)
(d5vyear: 37 changes made)
(2,925 real changes made)
(2,639 real changes made)
(2,670 real changes made)
(d8vday: 0 changes made)
(d8vmonth: 0 changes made)
(d8vyear: 0 changes made)
(1,174 real changes made)
(1,088 real changes made)
(1,137 real changes made)
(d11vday: 16 changes made)
(d11vmonth: 0 changes made)
(d11vyear: 0 changes made)
(458 real changes made)
(410 real changes made)
(426 real changes made)
(d14vday: 11004 changes made)
(d14vmonth: 10998 changes made)
(d14vyear: 10999 changes made)
(200 real changes made)
(174 real changes made)
(189 real changes made)


(note: file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern
>  (zh133@duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival ti
> me/inter data/work1.dta not found)
file F:\Bo

In [12]:
foreach i of global waves{
    keep d`i'vyear d`i'vmonth d`i'vday dth`i' fre`i'_year fre`i'_month fre`i'_day fre`i'_dth 
    duplicates drop d`i'vyear d`i'vmonth d`i'vday dth`i', force 
    save "${INTER}/wave`i'.dta", replace
    use "${INTER}/work1.dta", clear
}
use "${INTER}/wave14.dta",clear
append using "${INTER}/wave2.dta" "${INTER}/wave5.dta" "${INTER}/wave8.dta" "${INTER}/wave11.dta"



Duplicates in terms of d2vyear d2vmonth d2vday dth2

(11,195 observations deleted)
(note: file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern
>  (zh133@duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival ti
> me/inter data/wave2.dta not found)
file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern (zh133
> @duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival time/inte
> r data/wave2.dta saved

Duplicates in terms of d5vyear d5vmonth d5vday dth5

(11,189 observations deleted)
(note: file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern
>  (zh133@duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival ti
> me/inter data/wave5.dta not found)
file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern (zh133
> @duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival time/inte
> r data/wave5.dta saved

Duplicates in terms of d8vyear d8vmonth d8vday dth8

(11,1

In [13]:
browse

In [14]:
/* The results show that, in wave0-wave11, -9, -8, 0/-7(alive) have completely the same freq, 
all missing values in d*vyear/month/day occur only when dth*=1(died). Only in wave14, all is missing in d14vyear/month/day when dth14=-9/-8/0. 
There is no logical mistakes between the 4 vars.*/

In [15]:
// tabulate the lost,died and alive number for each wave
use "${INTER}/work1.dta",clear
foreach i of global waves{
    tabulate dth`i' if dth`i' !=-8
}




  status of |
  survival, |
  death, or |
    lost to |
  follow-up |
   from the |
    2000 to |
 2002 waves |      Freq.     Percent        Cum.
------------+-----------------------------------
         -9 |      1,541       13.76       13.76
          0 |      6,315       56.39       70.15
          1 |      3,343       29.85      100.00
------------+-----------------------------------
      Total |     11,199      100.00

  status of |
  survival, |
  death, or |
    lost to |
  follow-up |
   from the |
    2002 to |
 2005 waves |      Freq.     Percent        Cum.
------------+-----------------------------------
         -9 |        724       11.46       11.46
          0 |      2,629       41.63       53.10
          1 |      2,962       46.90      100.00
------------+-----------------------------------
      Total |      6,315      100.00

  status of |
  survival, |
  death, or |
    lost to |
  follow-up |
   from the |
    2005 to |
 2008 waves |      Freq.     Percent   

In [16]:
foreach i of global waves{
    erase "${INTER}/wave`i'.dta"
}
erase "${INTER}/work1.dta"

In [17]:
/********************************************************************************************************************/
/************************************* II. Replacement of NA and input mistakes *************************************/
/********************************************************************************************************************/

use "${INTER}/work.dta", clear
/*change all . to 99 for month&day, . to 9999 for year*/
foreach a of global waves{
    recode d`a'vday (. = 99) 
    recode d`a'vmonth (. = 99)
    recode d`a'vyear (. = 9999)
}



(d2vday: 0 changes made)
(d2vmonth: 0 changes made)
(d2vyear: 0 changes made)
(d5vday: 0 changes made)
(d5vmonth: 0 changes made)
(d5vyear: 0 changes made)
(d8vday: 0 changes made)
(d8vmonth: 0 changes made)
(d8vyear: 0 changes made)
(d11vday: 16 changes made)
(d11vmonth: 0 changes made)
(d11vyear: 0 changes made)
(d14vday: 11004 changes made)
(d14vmonth: 10998 changes made)
(d14vyear: 10999 changes made)


In [18]:
****calculate the mid-point between the last interview date of the previous wave and the first interview date of the next wave
capture noisily gen in98 = mdy(month98, date98, year9899)                            
capture noisily gen in0 = mdy(month00, day00, 2000)
capture noisily gen in2 = mdy(month_2, day_2, 2002)
capture noisily gen in5 = mdy(month_5, day_5, 2005)
capture noisily gen in8 = mdy(month_8, day_8, year_8)
gen in11 = mdy(monthin_11, dayin_11, yearin_11)
gen in14 = mdy(monthin_14, dayin_14, yearin_14)


year9899 not found


(4,887 missing values generated)

(8,570 missing values generated)

(10,249 missing values generated)

(10,836 missing values generated)

(11,056 missing values generated)


In [19]:
forv i=1/5 {                                                                     //******need to be changed                                               
    local wavein2 = word("$wavein", `i')
         egen min_`wavein2' = min(`wavein2')
         egen max_`wavein2' = max(`wavein2')
    local j = `i'+1
    local wavein3 = word("$wavein", `j')
         egen min_`wavein3' = min(`wavein3')
         egen max_`wavein3' = max(`wavein3')
    gen mid_`wavein2'_`wavein3' = (max_`wavein2' + min_`wavein3')/2
    gen midyear_`wavein2'_`wavein3' = year(mid_`wavein2'_`wavein3')
    gen midmonth_`wavein2'_`wavein3' = month(mid_`wavein2'_`wavein3')
    gen midday_`wavein2'_`wavein3' = day(mid_`wavein2'_`wavein3')
drop min_`wavein3' max_`wavein3'
}

In [20]:
/************************************* (5) Replacement of the missing death date according to Rule 1 *************************************/
* Rule 1:
* For the three variables, year, month, and day:
* a. if only month is missing, the month is assumed to be July;
* b. if only day is missing, the day is assumed to be 15;
* c. for the rest of all the scenarios, the year/month/day is assumed to be that of the mid-point between the last interview date of the previous wave and
* the first interview date of the next wave. (these scenarios inc, all the three variables are missing, or any two variables are missing, or only year is
* missing.)
local j = 1
foreach i of global waves { 
    local inid = word("$wavein",`j')
        replace d`i'vday = midday_`inid'_in`i' if d`i'vday == 99 & dth`i' == 1
        replace d`i'vmonth = midmonth_`inid'_in`i' if d`i'vmonth == 99 & dth`i' == 1
        replace d`i'vyear = midyear_`inid'_in`i' if d`i'vyear == 9999 & dth`i' == 1
      
        recode d`i'vday (99 = 15) if d`i'vmonth != 99 & d`i'vyear != 9999 & dth`i' == 1 
        recode d`i'vmonth (99 = 7) if d`i'vday != 99 & d`i'vyear != 9999 & dth`i' == 1 
    
    local j = `j'+1
}



(101 real changes made)
(0 real changes made)
(0 real changes made)
(d2vday: 0 changes made)
(d2vmonth: 0 changes made)
(196 real changes made)
(42 real changes made)
(37 real changes made)
(d5vday: 0 changes made)
(d5vmonth: 0 changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(d8vday: 0 changes made)
(d8vmonth: 0 changes made)
(16 real changes made)
(0 real changes made)
(0 real changes made)
(d11vday: 0 changes made)
(d11vmonth: 0 changes made)
(6 real changes made)
(0 real changes made)
(1 real change made)
(d14vday: 0 changes made)
(d14vmonth: 0 changes made)


In [21]:
/************************************* (6) Modify input mistakes of death date according to Rule 2 *************************************/
* Rule 2:
* a. change day 29/max of Feb to 28 for years 99, 01, 02, 03, 05, 06, 07, 09, 10, 11, 13, 14 (non-leap year);
* b. change day 30/max of Feb to 29 for years 00, 04, 08, 12 (leap year);
* c. change day 31 to 30 for months 4, 6, 9, 11
foreach i of global waves{
    foreach year of global year1{
        recode d`i'vday (29/max=28) if d`i'vyear == `year' & d`i'vmonth == 2
    }
    foreach year of global year2{
        recode d`i'vday (30/max=29) if d`i'vyear == `year' & d`i'vmonth == 2
    }
    foreach month of global months{
        recode d`i'vday (31=30) if d`i'vmonth == `month'
    }
}


(d2vday: 0 changes made)
(d2vday: 11 changes made)
(d2vday: 6 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 0 changes made)
(d2vday: 1 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 2 changes made)
(d5vday: 2 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 1 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made)
(d5vday: 0 changes made

In [22]:
/****************************************************************************************************************************************/
/************************************* III. calculating survival time, censor and lost to follow-up *************************************/
/****************************************************************************************************************************************/

/************************************* (7) Replacement of the missing interview baseline date according to Rule 3 *************************************/
* Rule 3:
* a. if only the interview day is missing, then the day is assumed to be 15th
* b. if both month and day are missing and the year isn't missing, or only the month is missing, the month/day is assumed to be that of the mid-point between the earliest interview date
* and the latest interiew date of that year
* c. no interview year is missing

recode day00 (99=15) if month00 != 99                                             //******need to be changed                                               

(day00: 0 changes made)


In [23]:
/************************************* (8) Modify input mistakes of interview baseline date according to Rule 2 *************************************/
* Rule 2:
* a. change day 29/max of Feb to 28 for years 99, 01, 02, 03, 05, 06, 07, 09, 10, 11, 13, 14 (non-leap year);
* b. change day 30/max of Feb to 29 for years 00, 04, 08, 12 (leap year);
* c. change day 31 to 30 for months 4, 6, 9, 11
recode day00 (29/max=28) if month00 == 2

foreach month of global months{
    recode day00 (31=30) if month00 == `month'
}


(day00: 0 changes made)

(day00: 0 changes made)
(day00: 0 changes made)
(day00: 0 changes made)
(day00: 0 changes made)


In [24]:
****set interview baseline
**codebook on interview date variables
* date00: day of interview of the 2000 survey; 1~31, 99=missing
* month00: month of the interview of the 2000 survey*; 1~12, 99=missing
gen interview_baseline = mdy(month00, day00, 2000)                                  //******need to be changed      

In [25]:
/************************************* (9) Calculate survival time for each person according to Rule 4 *************************************/
* Rule 4:
* Generate two different survival time (**for data sets with suffix_14**):

* One is `survival_bas', from interview baseline to death or censored, **up to 2014 wave**.
* a. For those died in the study: survival time = death date - interview date at baseline;
* b. For those lost in the study: survival time = the mid-point of the two adjacent waves - interview date at baseline;
* (the mid-point of the two adjacent waves is generated according to Rule 1)
* c. For those still alive at the end of the study: survival time = interview date in the last wave - interview date at baseline;
* d. If survival_bas < 0, change survival time to 0.

* Another one is `survival_bth', from birth to death or censored, **up to 2014 wave**.
* e. survival_bth = survival_bas + verified age (*trueage*)

* Variables for death/lost status
* `censor' is coded as: 1 = died, 0 = not died (alive or lost);
* `lost' is coed as: 1 = lost, . = not lost

* gen survival_bas,means the years from baseline to death or censored
* generate dthyear/month/day, means the exact death year/month/day of those who died during the whole period (2000-2014)
* gen lostdate, means the lost date for those lost in the survey, and equals to the mid-point of last day of the previous interview and the first day of the next one

gen dthyear = .
gen dthmonth = .
gen dthday = .
gen lostdate = .
gen survival_bas = .


(11,199 missing values generated)

(11,199 missing values generated)

(11,199 missing values generated)

(11,199 missing values generated)

(11,199 missing values generated)


In [26]:
local j=1
foreach i of global waves{
    replace dthyear = d`i'vyear if d`i'vyear > 0 & d`i'vyear < 2020
    replace dthmonth = d`i'vmonth if d`i'vmonth > 0 & d`i'vmonth < 13
    replace dthday = d`i'vday if d`i'vday > 0 & d`i'vday < 32
local inid = word("$wavein", `j')
    replace lostdate = mdy(midmonth_`inid'_in`i', midday_`inid'_in`i', midyear_`inid'_in`i') if dth`i' == -9
local j = `j' + 1
}  //3368 died in dth98_00, 1604 in dth00_02, 1308 in dth02_05, 480 in dth05_08, 177 in dth08_11, 75 in dth11_14



(3,343 real changes made)
(3,343 real changes made)
(3,343 real changes made)
(1,541 real changes made)
(2,962 real changes made)
(2,962 real changes made)
(2,962 real changes made)
(724 real changes made)
(1,174 real changes made)
(1,174 real changes made)
(1,174 real changes made)
(505 real changes made)
(458 real changes made)
(458 real changes made)
(458 real changes made)
(129 real changes made)
(201 real changes made)
(201 real changes made)
(201 real changes made)
(19 real changes made)


In [27]:
gen dthdate = mdy(dthmonth, dthday, dthyear)
replace survival_bas = (dthdate - interview_baseline)/365.25
gen censor = 0
replace censor = 1 if survival_bas != .  //generate censor=1 if die, censor=0 if survived until end of the wave or lost to follow


(3,061 missing values generated)

(8,138 real changes made)


(8,138 real changes made)


In [28]:
replace survival_bas = (lostdate - interview_baseline)/365.25 if lostdate != .
gen lost = 1
replace lost = . if lostdate == .  //lost:893,585,284,214,53,6 lost in 0 2 5 8 11 14 wave


(2,918 real changes made)


(8,281 real changes made, 8,281 to missing)


In [29]:
gen interview2014 = mdy(monthin_14, dayin_14, yearin_14) if dth14 == 0 //47 changes
replace survival_bas = (interview2014 - interview_baseline)/365.25 if interview2014 != .


(11,056 missing values generated)

(143 real changes made)


In [30]:
**************replace the survival time to 0 for those whose survival was negative
sum survival_bas
* gen survival_bth,means the years from birth to death or censored
replace survival_bas = 0 if survival_bas < 0  
* gen survival_bth,means the years from birth to death or censored
gen survival_bth = survival_bas + trueage                                                            
erase "${INTER}/work.dta"
macro drop _all



    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
survival_bas |     11,199    3.393858    3.047849   .0027379   14.42574

(0 real changes made)





In [33]:
/************************************* (10) calc survival time to 2018 *************************************/

merge 1:1 id using "${OUT}/dat14_18surtime.dta", keepus(id survival_bas14_18 survival_bth14_18 censor14_18 lost14_18) nolabel //47, 96, 1110, 821 merged for dat98/00/05/11_14

(note: variable id was long, now double to accommodate using data's values)

    Result                           # of obs.
    -----------------------------------------
    not matched                        18,105
        from master                    11,056  (_merge==1)
        from using                      7,049  (_merge==2)

    matched                               143  (_merge==3)
    -----------------------------------------


In [35]:
ren (survival_bas survival_bth lost censor) (survival_bas00_14 survival_bth00_14 lost00_14 censor00_14)
gen survival_bas00_18 = survival_bas00_14
replace survival_bas00_18 = survival_bas00_14 + survival_bas14_18 if censor00_14 == 0 & _merge == 3



(7,049 missing values generated)

(143 real changes made)


In [36]:
gen survival_bth00_18 = survival_bth00_14
replace survival_bth00_18 = survival_bth00_14 + survival_bas14_18 if censor00_14 == 0 & _merge == 3


(7,049 missing values generated)

(143 real changes made)


In [37]:
gen censor00_18 = censor00_14
replace censor00_18 = censor14_18 if _merge == 3  //23, 47, 282, 290 died between 2014 and 2018


(7,049 missing values generated)

(70 real changes made)


In [38]:
gen lost00_18 = lost00_14
replace lost00_18 = lost14_18 if _merge == 3 //14, 29, 288, 87 lost between 2014 and 2018


(15,330 missing values generated)

(43 real changes made)


In [39]:
drop if _merge==2
drop _merge


(7,049 observations deleted)



In [40]:
save "${OUT}/dat00_18surtime.dta", replace

(note: file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern
>  (zh133@duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival ti
> me/out data/dat00_18surtime.dta not found)
file F:\Box Sync\Archives2020LLY\Zhengting\Duke Kunshan University Intern (zh133
> @duke.edu)\4 healthy aging-CLHLS\Group meeting coordination\survival time/out 
> data/dat00_18surtime.dta saved
