# Empirical Project 2
 

In [1]:
%%capture
# Capture suppresses output to screen
import stata_setup
stata_setup.config("/Applications/Stata 17", "be")

In [2]:
%%stata
/* Define paths for data and output */
global data "Data"
global output "Output"
global country "zimbabwe"


. /* Define paths for data and output */
. global data "Data"

. global output "Output"

. global country "zimbabwe"

. 


In [3]:
%%capture
%%stata
log using "$output/empiricalPrjExplore-Zimbabwe.log", replace

## Open Zimbabwe's dataset and explore the contents

In [4]:
%%stata
import excel "$data/GYTS_2014_ZIMBABWE_NATIONAL.XLSX", ///
sheet("GYTS 2014 Zimbabwe (National) W") firstrow
describe


. import excel "$data/GYTS_2014_ZIMBABWE_NATIONAL.XLSX", ///
> sheet("GYTS 2014 Zimbabwe (National) W") firstrow
(75 vars, 6,427 obs)

. describe

Contains data
 Observations:         6,427                  
    Variables:            75                  
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
FinalWgt        double  %10.0g                FinalWgt
Stratum         long    %10.0g                Stratum
PSU             byte    %10.0g                PSU
CR1             byte    %10.0g                CR1
CR2             byte    %10.0g                CR2
ZBR3            byte    %10.0g                ZBR3
ZBR4            byte    %10.0g                ZBR4
CR5             byte    %10.0g                CR5
CR6             byte    %10.0g                CR6
CR7             byte 

---

# Clean-up and rename data elements

**Create age category based on CR1 with base category == 11 yr or younger.**

In [5]:
%%stata
label define ageCategoryLabels ///
0 "<= 11 yrs old" ///
1 "12 yrs old" ///
2 "13 yrs old" ///
3 "14 yrs old" ///
4 "15 yrs old" ///
5 "16 yrs old" ///
6 ">= 17 yrs old" , replace


. label define ageCategoryLabels ///
> 0 "<= 11 yrs old" ///
> 1 "12 yrs old" ///
> 2 "13 yrs old" ///
> 3 "14 yrs old" ///
> 4 "15 yrs old" ///
> 5 "16 yrs old" ///
> 6 ">= 17 yrs old" , replace

. 


In [6]:
%%stata
generate ageCategory = .
label values ageCategory ageCategoryLabels
replace ageCategory =0 if CR1==1
replace ageCategory =1 if CR1==2
replace ageCategory =2 if CR1==3
replace ageCategory =3 if CR1==4
replace ageCategory =4 if CR1==5
replace ageCategory =5 if CR1==6
replace ageCategory =6 if CR1==7


. generate ageCategory = .
(6,427 missing values generated)

. label values ageCategory ageCategoryLabels

. replace ageCategory =0 if CR1==1
(71 real changes made)

. replace ageCategory =1 if CR1==2
(581 real changes made)

. replace ageCategory =2 if CR1==3
(1,780 real changes made)

. replace ageCategory =3 if CR1==4
(2,046 real changes made)

. replace ageCategory =4 if CR1==5
(1,288 real changes made)

. replace ageCategory =5 if CR1==6
(468 real changes made)

. replace ageCategory =6 if CR1==7
(182 real changes made)

. 


In [7]:
%%stata
tab ageCategory
tab CR1


. tab ageCategory

  ageCategory |      Freq.     Percent        Cum.
--------------+-----------------------------------
<= 11 yrs old |         71        1.11        1.11
   12 yrs old |        581        9.06       10.16
   13 yrs old |      1,780       27.74       37.91
   14 yrs old |      2,046       31.89       69.79
   15 yrs old |      1,288       20.07       89.87
   16 yrs old |        468        7.29       97.16
>= 17 yrs old |        182        2.84      100.00
--------------+-----------------------------------
        Total |      6,416      100.00

. tab CR1

        CR1 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |         71        1.11        1.11
          2 |        581        9.06       10.16
          3 |      1,780       27.74       37.91
          4 |      2,046       31.89       69.79
          5 |      1,288       20.07       89.87
          6 |        468        7.29       97.16
          7 |        182   

**Create female based on CR2.  base category is male**

In [8]:
%%stata
label define sexLabels ///
0 "male" ///
1 "female", replace


. label define sexLabels ///
> 0 "male" ///
> 1 "female", replace

. 


In [9]:
%%stata
generate female =.
label values female sexLabels
replace female =0 if CR2==1
replace female =1 if CR2==2


. generate female =.
(6,427 missing values generated)

. label values female sexLabels

. replace female =0 if CR2==1
(2,835 real changes made)

. replace female =1 if CR2==2
(3,511 real changes made)

. 


In [10]:
%%stata
tab female
tab CR2


. tab female

     female |      Freq.     Percent        Cum.
------------+-----------------------------------
       male |      2,835       44.67       44.67
     female |      3,511       55.33      100.00
------------+-----------------------------------
      Total |      6,346      100.00

. tab CR2

        CR2 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      2,835       44.67       44.67
          2 |      3,511       55.33      100.00
------------+-----------------------------------
      Total |      6,346      100.00

. 


**Create hasMoney based on ZBR4.  base category will be no money, ZBR4==1.  hasMoney==1 when ZMBR4 > 1**

In [11]:
%%stata
label define hasMoneyLabel ///
0 "No spending money, CMR4/ZBR4=1" ///
1 "Has some spending money, CMR4/ZBR4=2-8", replace


. label define hasMoneyLabel ///
> 0 "No spending money, CMR4/ZBR4=1" ///
> 1 "Has some spending money, CMR4/ZBR4=2-8", replace

. 


In [14]:
%%stata
generate hasMoney = .
label values hasMoney hasMoneyLabel
replace hasMoney =0 if ZBR4==1
replace hasMoney =1 if ZBR4>1 & ZBR4 != .


. generate hasMoney = .
(6,427 missing values generated)

. label values hasMoney hasMoneyLabel

. replace hasMoney =0 if ZBR4==1
(1,955 real changes made)

. replace hasMoney =1 if ZBR4>1 & ZBR4 != .
(4,438 real changes made)

. 


In [15]:
%%stata
tab ZBR4
tab ZBR4 if ZBR4>1
tab hasMoney


. tab ZBR4

       ZBR4 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      1,955       30.58       30.58
          2 |      2,171       33.96       64.54
          3 |      1,138       17.80       82.34
          4 |        507        7.93       90.27
          5 |        114        1.78       92.05
          6 |        164        2.57       94.62
          7 |        344        5.38      100.00
------------+-----------------------------------
      Total |      6,393      100.00

. tab ZBR4 if ZBR4>1

       ZBR4 |      Freq.     Percent        Cum.
------------+-----------------------------------
          2 |      2,171       48.92       48.92
          3 |      1,138       25.64       74.56
          4 |        507       11.42       85.98
          5 |        114        2.57       88.55
          6 |        164        3.70       92.25
          7 |        344        7.75      100.00
------------+--------------------------------

**Create smoker from CR7.  smoker == 0 if 0 days smoked in the last 30 days**

In [16]:
%%stata
label define smokerLabel ///
0 "No cigarettes in previous 30 days" ///
1 "Some cigarettes in prevous 30 days", replace


. label define smokerLabel ///
> 0 "No cigarettes in previous 30 days" ///
> 1 "Some cigarettes in prevous 30 days", replace

. 


In [17]:
%%stata
generate smoker = .
label value smoker smokerLabel
replace smoker =0 if CR7==1
replace smoker =1 if CR7>1 & CR7 !=.


. generate smoker = .
(6,427 missing values generated)

. label value smoker smokerLabel

. replace smoker =0 if CR7==1
(5,079 real changes made)

. replace smoker =1 if CR7>1 & CR7 !=.
(483 real changes made)

. 


In [18]:
%%stata
tab CR7
tab CR7 if CR7>1
tab smoker


. tab CR7

        CR7 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      5,079       91.32       91.32
          2 |        157        2.82       94.14
          3 |         82        1.47       95.61
          4 |         53        0.95       96.57
          5 |         37        0.67       97.23
          6 |         48        0.86       98.09
          7 |        106        1.91      100.00
------------+-----------------------------------
      Total |      5,562      100.00

. tab CR7 if CR7>1

        CR7 |      Freq.     Percent        Cum.
------------+-----------------------------------
          2 |        157       32.51       32.51
          3 |         82       16.98       49.48
          4 |         53       10.97       60.46
          5 |         37        7.66       68.12
          6 |         48        9.94       78.05
          7 |        106       21.95      100.00
------------+-----------------------------------

**Create easyToQuit from CR41. Base category is "Definitely No" with decreasing difficulty at higher numbers**

In [19]:
%%stata
label define easyToQuitLabel ///
0 "Definitely not" ///
1 "Probably not" ///
2 "Probably yes" ///
3 "Definitely yes", replace


. label define easyToQuitLabel ///
> 0 "Definitely not" ///
> 1 "Probably not" ///
> 2 "Probably yes" ///
> 3 "Definitely yes", replace

. 


In [20]:
%%stata
generate easyToQuit = .
label value easyToQuit easyToQuitLabel
replace easyToQuit =0 if CR41==1
replace easyToQuit =1 if CR41==2
replace easyToQuit =2 if CR41==3
replace easyToQuit =3 if CR41==4


. generate easyToQuit = .
(6,427 missing values generated)

. label value easyToQuit easyToQuitLabel

. replace easyToQuit =0 if CR41==1
(1,837 real changes made)

. replace easyToQuit =1 if CR41==2
(1,279 real changes made)

. replace easyToQuit =2 if CR41==3
(1,314 real changes made)

. replace easyToQuit =3 if CR41==4
(1,800 real changes made)

. 


In [21]:
%%stata
tab easyToQuit
tab CR41


. tab easyToQuit

    easyToQuit |      Freq.     Percent        Cum.
---------------+-----------------------------------
Definitely not |      1,837       29.49       29.49
  Probably not |      1,279       20.53       50.02
  Probably yes |      1,314       21.09       71.11
Definitely yes |      1,800       28.89      100.00
---------------+-----------------------------------
         Total |      6,230      100.00

. tab CR41

       CR41 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      1,837       29.49       29.49
          2 |      1,279       20.53       50.02
          3 |      1,314       21.09       71.11
          4 |      1,800       28.89      100.00
------------+-----------------------------------
      Total |      6,230      100.00

. 


**Define smokingPresenceLabel as number of days someone smoked in the presence of respondent**

In [22]:
%%stata
label define smokingPresenceLabel ///
0 "0 Days" ///
1 "1-2 Days" ///
2 "3-4 Days" ///
3 "5-6 Days" ///
4 "7 days", replace


. label define smokingPresenceLabel ///
> 0 "0 Days" ///
> 1 "1-2 Days" ///
> 2 "3-4 Days" ///
> 3 "5-6 Days" ///
> 4 "7 days", replace

. 


In [23]:
%%stata
generate smokingHome = .
label value smokingHome smokingPresenceLabel
replace smokingHome =0 if CR19==1
replace smokingHome =1 if CR19==2
replace smokingHome =2 if CR19==3
replace smokingHome =3 if CR19==4
replace smokingHome =4 if CR19==5


. generate smokingHome = .
(6,427 missing values generated)

. label value smokingHome smokingPresenceLabel

. replace smokingHome =0 if CR19==1
(4,446 real changes made)

. replace smokingHome =1 if CR19==2
(642 real changes made)

. replace smokingHome =2 if CR19==3
(302 real changes made)

. replace smokingHome =3 if CR19==4
(213 real changes made)

. replace smokingHome =4 if CR19==5
(642 real changes made)

. 


In [24]:
%%stata
generate smokingIndoors = .
label value smokingIndoors smokingPresenceLabel
replace smokingIndoors =0 if CR20==1
replace smokingIndoors =1 if CR20==2
replace smokingIndoors =2 if CR20==3
replace smokingIndoors =3 if CR20==4
replace smokingIndoors =4 if CR20==5


. generate smokingIndoors = .
(6,427 missing values generated)

. label value smokingIndoors smokingPresenceLabel

. replace smokingIndoors =0 if CR20==1
(2,843 real changes made)

. replace smokingIndoors =1 if CR20==2
(1,349 real changes made)

. replace smokingIndoors =2 if CR20==3
(647 real changes made)

. replace smokingIndoors =3 if CR20==4
(421 real changes made)

. replace smokingIndoors =4 if CR20==5
(1,038 real changes made)

. 


In [25]:
%%stata
generate smokingOutdoors = .
label value smokingOutdoors smokingPresenceLabel
replace smokingOutdoors =0 if CR21==1
replace smokingOutdoors =1 if CR21==2
replace smokingOutdoors =2 if CR21==3
replace smokingOutdoors =3 if CR21==4
replace smokingOutdoors =4 if CR21==5


. generate smokingOutdoors = .
(6,427 missing values generated)

. label value smokingOutdoors smokingPresenceLabel

. replace smokingOutdoors =0 if CR21==1
(2,802 real changes made)

. replace smokingOutdoors =1 if CR21==2
(1,446 real changes made)

. replace smokingOutdoors =2 if CR21==3
(618 real changes made)

. replace smokingOutdoors =3 if CR21==4
(449 real changes made)

. replace smokingOutdoors =4 if CR21==5
(952 real changes made)

. 


In [26]:
%%stata
save "${data}/${country}SmokingSurvery.data", replace


(file Data/zimbabweSmokingSurvery.data not found)
file Data/zimbabweSmokingSurvery.data saved as .dta format


In [None]:
%%capture
%stata log close