In [1]:
import pandas as pd, numpy as np
from db_queries import get_ids, get_outputs, get_location_metadata, get_population, get_covariate_estimates
from get_draws.api import get_draws
import matplotlib.pyplot as plt
import os

The purpose of this notebook is to estimate the proportion of smoking-attributable mortality that is due to lung cancer, COPD, and IHD in China in order to assess the magnitude of the limitation of our model in using these three causes as a proxy for all smoking-attributable mortality for the lung cancer screening model

In [2]:
smok_attrib = get_draws(gbd_id_type='rei_id',
                gbd_id=99,#426,493,509], #smoking, all causes, lung cancer, IHD, COPD
                source='burdenator',
                measure_id=1, # deaths
                metric_id=1, # count
                location_id=6, # china
                year_id=2019,
                gbd_round_id=6,
                decomp_step='step5',
                status='best')

smok_attrib = smok_attrib.loc[smok_attrib.cause_id.isin([294,426,493,509])]
smok_attrib.head()

Unnamed: 0,age_group_id,cause_id,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,...,draw_996,draw_997,draw_998,draw_999,location_id,measure_id,metric_id,rei_id,sex_id,year_id
0,11,294,466.560914,440.03082,463.484026,368.750515,356.996887,315.822957,373.55765,325.546755,...,574.119698,316.65589,351.343638,309.117415,6,1,1,99,2,2019
1,12,294,694.42024,378.672623,733.806222,605.367953,758.976669,762.29963,855.913949,797.127632,...,868.578567,659.880376,570.80442,715.466889,6,1,1,99,2,2019
2,13,294,1580.537425,1196.457942,1391.908416,2102.859889,1230.500408,1824.424167,2011.181383,622.396982,...,1189.462895,1865.434031,1214.150237,943.167196,6,1,1,99,2,2019
3,14,294,3060.670617,2427.141134,2519.407191,2478.295578,1796.35548,2572.23753,2477.73865,2147.053226,...,2487.44827,1828.423286,1954.004295,3713.693577,6,1,1,99,2,2019
4,15,294,10908.362129,8043.579111,9314.455576,12369.290613,8117.648314,8700.781006,9736.936031,10220.615273,...,8171.207931,9054.016227,8956.806859,8997.675913,6,1,1,99,2,2019


In [3]:
index_cols = ['location_id','measure_id','metric_id','year_id','rei_id','sex_id','cause_id']

In [4]:
# sum over age groups
smok_sum = smok_attrib.groupby(index_cols).sum()
smok_sum = smok_sum.drop(columns=[c for c in smok_sum.columns if 'draw' not in c])
smok_sum

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,measure_id,metric_id,year_id,rei_id,sex_id,cause_id,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
6,1,1,2019,99,1,294,1981327.0,2079242.0,1906669.0,2146907.0,1952828.0,1928905.0,2067454.0,2270499.0,1921219.0,1665847.0,...,1909636.0,1955507.0,1740128.0,2039695.0,1756255.0,2192080.0,2529665.0,2078320.0,2329986.0,2107989.0
6,1,1,2019,99,1,426,426268.1,445747.6,403332.0,459726.5,403693.1,407746.1,415687.7,479451.2,392315.1,334606.4,...,429226.6,407028.6,351549.2,437747.6,373917.5,418950.5,597492.3,470920.5,459307.6,444735.2
6,1,1,2019,99,1,493,340780.9,354857.9,320089.2,354096.7,322883.1,324048.5,338582.1,375087.5,315162.8,268905.0,...,303944.2,329211.0,289748.2,312834.9,286615.7,336357.0,375213.4,364478.9,432312.9,355742.4
6,1,1,2019,99,1,509,399259.1,438034.8,411891.9,472554.4,441051.3,446187.5,465253.2,481027.0,429734.7,368327.0,...,409938.0,398786.1,411130.8,504994.0,421201.1,525800.9,521864.2,361636.9,512973.1,476641.1
6,1,1,2019,99,2,294,356880.5,310761.2,320061.1,343412.6,277693.2,360716.3,350706.8,322474.5,273181.6,278072.2,...,362889.1,402582.0,338498.4,350940.2,360714.4,363082.0,335678.4,333945.6,315793.9,408991.3
6,1,1,2019,99,2,426,67382.27,55340.52,60716.31,68317.23,59288.85,71558.78,62119.19,64506.64,50500.11,52069.53,...,63551.58,66843.19,68181.26,59897.17,65612.41,67320.94,56837.53,63384.9,58449.46,68235.65
6,1,1,2019,99,2,493,74022.35,66764.55,68951.7,74239.98,54246.6,79069.64,69344.05,65958.59,54180.62,58386.64,...,61924.72,76382.21,68369.49,61766.68,68607.06,61211.6,65177.71,69518.54,62108.56,71511.74
6,1,1,2019,99,2,509,100195.4,91784.76,87007.21,91836.36,75076.52,97973.42,92661.47,86841.22,74187.59,73171.3,...,143798.8,138302.9,111026.0,127363.7,115013.9,121616.5,125921.0,107809.8,117577.7,160123.1


In [5]:
smok_modeled = smok_sum.reset_index().loc[smok_sum.reset_index().cause_id != 294]
smok_modeled['cause_id'] = 'modeled'
smok_modeled = smok_modeled.groupby(index_cols).sum()
smok_modeled

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,measure_id,metric_id,year_id,rei_id,sex_id,cause_id,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
6,1,1,2019,99,1,modeled,1166308.0,1238640.0,1135313.0,1286378.0,1167628.0,1177982.0,1219523.0,1335566.0,1137213.0,971838.404964,...,1143109.0,1135026.0,1052428.0,1255576.0,1081734.0,1281108.0,1494570.0,1197036.0,1404594.0,1277119.0
6,1,1,2019,99,2,modeled,241600.0,213889.8,216675.2,234393.6,188612.0,248601.8,224124.7,217306.5,178868.3,183627.467344,...,269275.1,281528.3,247576.8,249027.5,249233.4,250149.1,247936.3,240713.2,238135.8,299870.5


In [6]:
smok_fin = smok_sum.append(smok_modeled)
smok_fin

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,measure_id,metric_id,year_id,rei_id,sex_id,cause_id,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
6,1,1,2019,99,1,294,1981327.0,2079242.0,1906669.0,2146907.0,1952828.0,1928905.0,2067454.0,2270499.0,1921219.0,1665847.0,...,1909636.0,1955507.0,1740128.0,2039695.0,1756255.0,2192080.0,2529665.0,2078320.0,2329986.0,2107989.0
6,1,1,2019,99,1,426,426268.1,445747.6,403332.0,459726.5,403693.1,407746.1,415687.7,479451.2,392315.1,334606.4,...,429226.6,407028.6,351549.2,437747.6,373917.5,418950.5,597492.3,470920.5,459307.6,444735.2
6,1,1,2019,99,1,493,340780.9,354857.9,320089.2,354096.7,322883.1,324048.5,338582.1,375087.5,315162.8,268905.0,...,303944.2,329211.0,289748.2,312834.9,286615.7,336357.0,375213.4,364478.9,432312.9,355742.4
6,1,1,2019,99,1,509,399259.1,438034.8,411891.9,472554.4,441051.3,446187.5,465253.2,481027.0,429734.7,368327.0,...,409938.0,398786.1,411130.8,504994.0,421201.1,525800.9,521864.2,361636.9,512973.1,476641.1
6,1,1,2019,99,2,294,356880.5,310761.2,320061.1,343412.6,277693.2,360716.3,350706.8,322474.5,273181.6,278072.2,...,362889.1,402582.0,338498.4,350940.2,360714.4,363082.0,335678.4,333945.6,315793.9,408991.3
6,1,1,2019,99,2,426,67382.27,55340.52,60716.31,68317.23,59288.85,71558.78,62119.19,64506.64,50500.11,52069.53,...,63551.58,66843.19,68181.26,59897.17,65612.41,67320.94,56837.53,63384.9,58449.46,68235.65
6,1,1,2019,99,2,493,74022.35,66764.55,68951.7,74239.98,54246.6,79069.64,69344.05,65958.59,54180.62,58386.64,...,61924.72,76382.21,68369.49,61766.68,68607.06,61211.6,65177.71,69518.54,62108.56,71511.74
6,1,1,2019,99,2,509,100195.4,91784.76,87007.21,91836.36,75076.52,97973.42,92661.47,86841.22,74187.59,73171.3,...,143798.8,138302.9,111026.0,127363.7,115013.9,121616.5,125921.0,107809.8,117577.7,160123.1
6,1,1,2019,99,1,modeled,1166308.0,1238640.0,1135313.0,1286378.0,1167628.0,1177982.0,1219523.0,1335566.0,1137213.0,971838.4,...,1143109.0,1135026.0,1052428.0,1255576.0,1081734.0,1281108.0,1494570.0,1197036.0,1404594.0,1277119.0
6,1,1,2019,99,2,modeled,241600.0,213889.8,216675.2,234393.6,188612.0,248601.8,224124.7,217306.5,178868.3,183627.5,...,269275.1,281528.3,247576.8,249027.5,249233.4,250149.1,247936.3,240713.2,238135.8,299870.5


In [7]:
smok_all = smok_fin.reset_index()
smok_all = smok_all.loc[smok_all.cause_id==294].drop(columns='cause_id').set_index(index_cols[0:(len(index_cols) - 1)])
smok_all

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,measure_id,metric_id,year_id,rei_id,sex_id,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
6,1,1,2019,99,1,1981327.0,2079242.0,1906669.0,2146907.0,1952828.0,1928905.0,2067454.0,2270499.0,1921219.0,1665847.0,...,1909636.0,1955507.0,1740128.0,2039695.0,1756255.0,2192080.0,2529665.0,2078320.0,2329986.0,2107989.0
6,1,1,2019,99,2,356880.5,310761.2,320061.1,343412.6,277693.2,360716.3,350706.8,322474.5,273181.6,278072.2,...,362889.1,402582.0,338498.4,350940.2,360714.4,363082.0,335678.4,333945.6,315793.9,408991.3


In [8]:
smok_frac = smok_fin / smok_all
smok_frac

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,draw_0,draw_1,draw_10,draw_100,draw_101,draw_102,draw_103,draw_104,draw_105,draw_106,...,draw_990,draw_991,draw_992,draw_993,draw_994,draw_995,draw_996,draw_997,draw_998,draw_999
location_id,measure_id,metric_id,year_id,rei_id,sex_id,cause_id,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
6,1,1,2019,99,1,294,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1,1,2019,99,1,426,0.215143,0.21438,0.211538,0.214134,0.206722,0.211387,0.201063,0.211166,0.204201,0.200863,...,0.224769,0.208145,0.202025,0.214614,0.212906,0.19112,0.236194,0.226587,0.197129,0.210976
6,1,1,2019,99,1,493,0.171996,0.170667,0.167879,0.164933,0.165341,0.167996,0.163768,0.1652,0.164043,0.161422,...,0.159163,0.168351,0.16651,0.153373,0.163197,0.153442,0.148325,0.175372,0.185543,0.168759
6,1,1,2019,99,1,509,0.201511,0.21067,0.216027,0.220109,0.225853,0.231316,0.225037,0.21186,0.223678,0.221105,...,0.214668,0.20393,0.236265,0.247583,0.239829,0.239864,0.206298,0.174004,0.220161,0.226112
6,1,1,2019,99,1,modeled,0.58865,0.595717,0.595443,0.599177,0.597916,0.6107,0.589867,0.588226,0.591922,0.58339,...,0.598601,0.580425,0.604799,0.615571,0.615932,0.584426,0.590817,0.575963,0.602834,0.605847
6,1,1,2019,99,2,294,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1,1,2019,99,2,426,0.188809,0.178081,0.189702,0.198936,0.213505,0.19838,0.177126,0.200036,0.184859,0.187252,...,0.175127,0.166036,0.201423,0.170676,0.181896,0.185415,0.169321,0.189806,0.185087,0.166839
6,1,1,2019,99,2,493,0.207415,0.214842,0.215433,0.216183,0.195347,0.219202,0.197727,0.204539,0.198332,0.209969,...,0.170644,0.189731,0.201979,0.176003,0.190198,0.168589,0.194167,0.208173,0.196674,0.174849
6,1,1,2019,99,2,509,0.280753,0.295355,0.271846,0.267423,0.270358,0.271608,0.264213,0.269296,0.271569,0.263138,...,0.396261,0.34354,0.327996,0.362921,0.31885,0.334956,0.375124,0.322836,0.372324,0.391507
6,1,1,2019,99,2,modeled,0.676977,0.688277,0.676981,0.682542,0.67921,0.689189,0.639066,0.673872,0.65476,0.660359,...,0.742031,0.699307,0.731397,0.709601,0.690944,0.68896,0.738613,0.720816,0.754086,0.733195


In [9]:
smok_frac_fin = smok_frac.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
smok_frac_fin

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,count,mean,std,min,2.5%,50%,97.5%,max
location_id,measure_id,metric_id,year_id,rei_id,sex_id,cause_id,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
6,1,1,2019,99,1,294,1000.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
6,1,1,2019,99,1,426,1000.0,0.205529,0.007814,0.172881,0.190225,0.206197,0.218379,0.236194
6,1,1,2019,99,1,493,1000.0,0.166102,0.005301,0.128422,0.155548,0.166379,0.174844,0.192204
6,1,1,2019,99,1,509,1000.0,0.21519,0.010656,0.174004,0.197553,0.214317,0.239594,0.281707
6,1,1,2019,99,1,modeled,1000.0,0.586821,0.012247,0.506328,0.560285,0.587667,0.607565,0.635693
6,1,1,2019,99,2,294,1000.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
6,1,1,2019,99,2,426,1000.0,0.188361,0.011651,0.137033,0.163952,0.189143,0.209606,0.222175
6,1,1,2019,99,2,493,1000.0,0.197681,0.012536,0.142683,0.169292,0.198573,0.219922,0.227956
6,1,1,2019,99,2,509,1000.0,0.30269,0.032623,0.204089,0.249394,0.299228,0.396265,0.483089
6,1,1,2019,99,2,modeled,1000.0,0.688732,0.028651,0.528904,0.626029,0.691067,0.739607,0.788024


## Conclusion:
Modeling lung cancer, COPD, and IHD mortality will account for 
58.6% (UI: 56.0 - 60.8) of all smoking attributable mortality in China in 2019 among makes of all ages and 68.9% (UI: 62.6 - 74.0) for females.

# Repeat but combine sexes

In [11]:
index_cols = ['location_id','measure_id','metric_id','year_id','rei_id','cause_id']

In [13]:
# sum over age groups and sexes
smok_sum = smok_attrib.groupby(index_cols).sum()
smok_sum = smok_sum.drop(columns=[c for c in smok_sum.columns if 'draw' not in c])

smok_modeled = smok_sum.reset_index().loc[smok_sum.reset_index().cause_id != 294]
smok_modeled['cause_id'] = 'modeled'
smok_modeled = smok_modeled.groupby(index_cols).sum()

smok_fin = smok_sum.append(smok_modeled)

smok_all = smok_fin.reset_index()
smok_all = smok_all.loc[smok_all.cause_id==294].drop(columns='cause_id').set_index(index_cols[0:(len(index_cols) - 1)])

smok_frac = smok_fin / smok_all
smok_frac_fin = smok_frac.apply(pd.DataFrame.describe, percentiles=[0.025,0.975], axis=1)
smok_frac_fin

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,count,mean,std,min,2.5%,50%,97.5%,max
location_id,measure_id,metric_id,year_id,rei_id,cause_id,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
6,1,1,2019,99,294,1000.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
6,1,1,2019,99,426,1000.0,0.203203,0.00737,0.169461,0.188204,0.203951,0.215776,0.22836
6,1,1,2019,99,493,1000.0,0.170379,0.005388,0.134863,0.158991,0.170761,0.179466,0.196029
6,1,1,2019,99,509,1000.0,0.227023,0.012047,0.185423,0.206169,0.22606,0.25406,0.296207
6,1,1,2019,99,modeled,1000.0,0.600605,0.013904,0.509302,0.569341,0.602029,0.625095,0.650621


## Conclusion:
Modeling lung cancer, COPD, and IHD mortality will account for 
60% (UI: 56.9 - 62.5) of all smoking attributable mortality in China in 2019
across all sex and ages