# Empirical Project 2
 

In [1]:
%%capture
# Capture suppresses output to screen
import stata_setup
stata_setup.config("/Applications/Stata 17", "be")

In [2]:
%%stata
/* Define paths for data and output */
global data "Data"
global output "Output"
global country "cameroon"


. /* Define paths for data and output */
. global data "Data"

. global output "Output"

. global country "cameroon"

. 


In [3]:
%%capture
%%stata
log using "$output/empiricalPrjExplore-Cameroon.log", replace

## Open Cameroon's dataset and explore the contents

In [27]:
%%stata
import excel "$data/GYTS_2014_CAMEROON_NATIONAL.XLSX", ///
sheet("GYTS 2014 CAMEROON (NATIONAL) W") firstrow
describe


. import excel "$data/GYTS_2014_CAMEROON_NATIONAL.XLSX", ///
> sheet("GYTS 2014 CAMEROON (NATIONAL) W") firstrow
(46 vars, 2,922 obs)

. describe

Contains data
 Observations:         2,922                  
    Variables:            46                  
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
FinalWgt        double  %10.0g                FinalWgt
CR1             byte    %10.0g                CR1
CR2             byte    %10.0g                CR2
CMR3            byte    %10.0g                CMR3
CMR4            byte    %10.0g                CMR4
CR5             byte    %10.0g                CR5
CR6             byte    %10.0g                CR6
CR7             byte    %10.0g                CR7
CR8             byte    %10.0g                CR8
CR9             byte    %

---

# Clean-up and rename data elements

**Create age category based on CR1 with base category == 11 yr or younger.**

In [28]:
%%stata
label define ageCategoryLabels ///
0 "<= 11 yrs old" ///
1 "12 yrs old" ///
2 "13 yrs old" ///
3 "14 yrs old" ///
4 "15 yrs old" ///
5 "16 yrs old" ///
6 ">= 17 yrs old" , replace


. label define ageCategoryLabels ///
> 0 "<= 11 yrs old" ///
> 1 "12 yrs old" ///
> 2 "13 yrs old" ///
> 3 "14 yrs old" ///
> 4 "15 yrs old" ///
> 5 "16 yrs old" ///
> 6 ">= 17 yrs old" , replace

. 


In [29]:
%%stata
generate ageCategory = .
label values ageCategory ageCategoryLabels
replace ageCategory =0 if CR1==1
replace ageCategory =1 if CR1==2
replace ageCategory =2 if CR1==3
replace ageCategory =3 if CR1==4
replace ageCategory =4 if CR1==5
replace ageCategory =5 if CR1==6
replace ageCategory =6 if CR1==7


. generate ageCategory = .
(2,922 missing values generated)

. label values ageCategory ageCategoryLabels

. replace ageCategory =0 if CR1==1
(122 real changes made)

. replace ageCategory =1 if CR1==2
(241 real changes made)

. replace ageCategory =2 if CR1==3
(537 real changes made)

. replace ageCategory =3 if CR1==4
(721 real changes made)

. replace ageCategory =4 if CR1==5
(615 real changes made)

. replace ageCategory =5 if CR1==6
(293 real changes made)

. replace ageCategory =6 if CR1==7
(391 real changes made)

. 


In [30]:
%%stata
tab ageCategory
tab CR1


. tab ageCategory

  ageCategory |      Freq.     Percent        Cum.
--------------+-----------------------------------
<= 11 yrs old |        122        4.18        4.18
   12 yrs old |        241        8.25       12.43
   13 yrs old |        537       18.39       30.82
   14 yrs old |        721       24.69       55.51
   15 yrs old |        615       21.06       76.58
   16 yrs old |        293       10.03       86.61
>= 17 yrs old |        391       13.39      100.00
--------------+-----------------------------------
        Total |      2,920      100.00

. tab CR1

        CR1 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |        122        4.18        4.18
          2 |        241        8.25       12.43
          3 |        537       18.39       30.82
          4 |        721       24.69       55.51
          5 |        615       21.06       76.58
          6 |        293       10.03       86.61
          7 |        391   

**Create female based on CR2.  base category is male**

In [31]:
%%stata
label define sexLabels ///
0 "male" ///
1 "female", replace


. label define sexLabels ///
> 0 "male" ///
> 1 "female", replace

. 


In [32]:
%%stata
generate female =.
label values female sexLabels
replace female =0 if CR2==1
replace female =1 if CR2==2


. generate female =.
(2,922 missing values generated)

. label values female sexLabels

. replace female =0 if CR2==1
(1,622 real changes made)

. replace female =1 if CR2==2
(1,298 real changes made)

. 


In [33]:
%%stata
tab female
tab CR2


. tab female

     female |      Freq.     Percent        Cum.
------------+-----------------------------------
       male |      1,622       55.55       55.55
     female |      1,298       44.45      100.00
------------+-----------------------------------
      Total |      2,920      100.00

. tab CR2

        CR2 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      1,622       55.55       55.55
          2 |      1,298       44.45      100.00
------------+-----------------------------------
      Total |      2,920      100.00

. 


**Create hasMoney based on CMR4.  base category will be no money, CMR4==1.  hasMoney==1 when CMR4 > 1**

In [34]:
%%stata
label define hasMoneyLabel ///
0 "No spending money, CMR4=1" ///
1 "Has some spending money, CMR4=2-8", replace


. label define hasMoneyLabel ///
> 0 "No spending money, CMR4=1" ///
> 1 "Has some spending money, CMR4=2-8", replace

. 


In [35]:
%%stata
generate hasMoney = .
label values hasMoney hasMoneyLabel
replace hasMoney =0 if CMR4==1
replace hasMoney =1 if CMR4>1 & CMR4 != .


. generate hasMoney = .
(2,922 missing values generated)

. label values hasMoney hasMoneyLabel

. replace hasMoney =0 if CMR4==1
(635 real changes made)

. replace hasMoney =1 if CMR4>1 & CMR4 != .
(2,278 real changes made)

. 


In [36]:
%%stata
tab CMR4
tab CMR4 if CMR4>1
tab hasMoney


. tab CMR4

       CMR4 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |        635       21.80       21.80
          2 |      1,156       39.68       61.48
          3 |        555       19.05       80.54
          4 |        268        9.20       89.74
          5 |        119        4.09       93.82
          6 |         87        2.99       96.81
          7 |         50        1.72       98.52
          8 |         43        1.48      100.00
------------+-----------------------------------
      Total |      2,913      100.00

. tab CMR4 if CMR4>1

       CMR4 |      Freq.     Percent        Cum.
------------+-----------------------------------
          2 |      1,156       50.75       50.75
          3 |        555       24.36       75.11
          4 |        268       11.76       86.87
          5 |        119        5.22       92.10
          6 |         87        3.82       95.92
          7 |         50        2.19       98

**Create smoker from CR7.  smoker == 0 if 0 days smoked in the last 30 days**

In [37]:
%%stata
label define smokerLabel ///
0 "No cigarettes in previous 30 days" ///
1 "Some cigarettes in prevous 30 days", replace


. label define smokerLabel ///
> 0 "No cigarettes in previous 30 days" ///
> 1 "Some cigarettes in prevous 30 days", replace

. 


In [38]:
%%stata
generate smoker = .
label value smoker smokerLabel
replace smoker =0 if CR7==1
replace smoker =1 if CR7>1 & CR7 !=.


. generate smoker = .
(2,922 missing values generated)

. label value smoker smokerLabel

. replace smoker =0 if CR7==1
(2,577 real changes made)

. replace smoker =1 if CR7>1 & CR7 !=.
(191 real changes made)

. 


In [39]:
%%stata
tab CR7
tab CR7 if CR7>1
tab smoker


. tab CR7

        CR7 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      2,577       93.10       93.10
          2 |        115        4.15       97.25
          3 |         33        1.19       98.45
          4 |         12        0.43       98.88
          5 |         12        0.43       99.31
          6 |          4        0.14       99.46
          7 |         15        0.54      100.00
------------+-----------------------------------
      Total |      2,768      100.00

. tab CR7 if CR7>1

        CR7 |      Freq.     Percent        Cum.
------------+-----------------------------------
          2 |        115       60.21       60.21
          3 |         33       17.28       77.49
          4 |         12        6.28       83.77
          5 |         12        6.28       90.05
          6 |          4        2.09       92.15
          7 |         15        7.85      100.00
------------+-----------------------------------

**Create easyToQuit from CR41. Base category is "Yes"**

In [40]:
%%stata
label define easyToQuitLabel ///
0 "Not Easy to Quit" ///
1 "Easy to Quiet" , replace


. label define easyToQuitLabel ///
> 0 "Not Easy to Quit" ///
> 1 "Easy to Quiet" , replace

. 


In [41]:
%%stata
generate easyToQuit = .
label value easyToQuit easyToQuitLabel
replace easyToQuit =1 if CR41==1
replace easyToQuit =1 if CR41==2
replace easyToQuit =0 if CR41==3
replace easyToQuit =0 if CR41==4


. generate easyToQuit = .
(2,922 missing values generated)

. label value easyToQuit easyToQuitLabel

. replace easyToQuit =1 if CR41==1
(1,164 real changes made)

. replace easyToQuit =1 if CR41==2
(369 real changes made)

. replace easyToQuit =0 if CR41==3
(527 real changes made)

. replace easyToQuit =0 if CR41==4
(846 real changes made)

. 


In [42]:
%%stata
tab easyToQuit
tab CR41


. tab easyToQuit

      easyToQuit |      Freq.     Percent        Cum.
-----------------+-----------------------------------
Not Easy to Quit |      1,373       47.25       47.25
   Easy to Quiet |      1,533       52.75      100.00
-----------------+-----------------------------------
           Total |      2,906      100.00

. tab CR41

       CR41 |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      1,164       40.06       40.06
          2 |        369       12.70       52.75
          3 |        527       18.13       70.89
          4 |        846       29.11      100.00
------------+-----------------------------------
      Total |      2,906      100.00

. 


**Define smokingPresenceLabel as number of days someone smoked in the presence of respondent**

In [43]:
%%stata
label define smokingPresenceLabel ///
0 "No Smoking" ///
1 "Yes Smoking", replace


. label define smokingPresenceLabel ///
> 0 "No Smoking" ///
> 1 "Yes Smoking", replace

. 


In [44]:
%%stata
generate smokingHome = .
label value smokingHome smokingPresenceLabel
replace smokingHome =0 if CR19==1
replace smokingHome =1 if CR19==2
replace smokingHome =1 if CR19==3
replace smokingHome =1 if CR19==4
replace smokingHome =1 if CR19==5


. generate smokingHome = .
(2,922 missing values generated)

. label value smokingHome smokingPresenceLabel

. replace smokingHome =0 if CR19==1
(2,075 real changes made)

. replace smokingHome =1 if CR19==2
(357 real changes made)

. replace smokingHome =1 if CR19==3
(182 real changes made)

. replace smokingHome =1 if CR19==4
(77 real changes made)

. replace smokingHome =1 if CR19==5
(222 real changes made)

. 


In [45]:
%%stata
generate smokingIndoors = .
label value smokingIndoors smokingPresenceLabel
replace smokingIndoors =0 if CR20==1
replace smokingIndoors =1 if CR20==2
replace smokingIndoors =1 if CR20==3
replace smokingIndoors =1 if CR20==4
replace smokingIndoors =1 if CR20==5


. generate smokingIndoors = .
(2,922 missing values generated)

. label value smokingIndoors smokingPresenceLabel

. replace smokingIndoors =0 if CR20==1
(1,613 real changes made)

. replace smokingIndoors =1 if CR20==2
(592 real changes made)

. replace smokingIndoors =1 if CR20==3
(272 real changes made)

. replace smokingIndoors =1 if CR20==4
(139 real changes made)

. replace smokingIndoors =1 if CR20==5
(288 real changes made)

. 


In [46]:
%%stata
generate smokingOutdoors = .
label value smokingOutdoors smokingPresenceLabel
replace smokingOutdoors =0 if CR21==1
replace smokingOutdoors =1 if CR21==2
replace smokingOutdoors =1 if CR21==3
replace smokingOutdoors =1 if CR21==4
replace smokingOutdoors =1 if CR21==5


. generate smokingOutdoors = .
(2,922 missing values generated)

. label value smokingOutdoors smokingPresenceLabel

. replace smokingOutdoors =0 if CR21==1
(1,473 real changes made)

. replace smokingOutdoors =1 if CR21==2
(705 real changes made)

. replace smokingOutdoors =1 if CR21==3
(273 real changes made)

. replace smokingOutdoors =1 if CR21==4
(152 real changes made)

. replace smokingOutdoors =1 if CR21==5
(302 real changes made)

. 


In [47]:
%%stata
save "${data}/${country}SmokingSurvery.dta", replace


file Data/cameroonSmokingSurvery.dta saved


In [48]:
%%capture
%stata log close

In [49]:
%stata clear