In [44]:
import bokeh
import pandas as pd

In [48]:
print(bokeh.__version__)

1.0.4


In [2]:
def process_data():

    # Import the Risk Dataset
    risk = pd.read_csv('#############/parkinsons/data/IHME-GBD_2017_DATA-All-Risks.csv')

    # cholesterol in YLDs
    chol = risk.loc[risk['rei_name'] == 'High LDL cholesterol']
    chol = chol.rename(columns={'val': 'cholesterol'})

    # Fasting Plasma Glucose in YLDs
    glucose = risk.loc[risk['rei_name'] == 'High fasting plasma glucose']
    glucose = glucose.rename(columns={'val': 'glucose'})

    # High body-mass index in YLDs
    bmi = risk.loc[risk['rei_name'] == 'High body-mass index']
    bmi = bmi.rename(columns={'val': 'bmi'})

    # Diet low in legumes in YLDs
    legumes = risk.loc[risk['rei_name'] == 'Diet low in legumes']
    legumes = legumes.rename(columns={'val': 'legumes'})

    # Diet low in fruits in YLDs
    fruits = risk.loc[risk['rei_name'] == 'Diet low in fruits']
    fruits = fruits.rename(columns={'val': 'fruits'})

    # Diet low in milk in YLDs
    milk = risk.loc[risk['rei_name'] == 'Diet low in milk']
    milk = milk.rename(columns={'val': 'milk'})

    # Diet low in whole grains in YLDs
    grains = risk.loc[risk['rei_name'] == 'Diet low in whole grains']
    grains = grains.rename(columns={'val': 'grains'})

    # Diet high in processed meat in YLDs
    meat = risk.loc[risk['rei_name'] == 'Diet high in processed meat']
    meat = meat.rename(columns={'val': 'processed_meat'})

    # Diet low in vegetables in YLDs
    veg = risk.loc[risk['rei_name'] == 'Diet low in vegetables']
    veg = veg.rename(columns={'val': 'veg'})

    # Diet high in sugar-sweetened beverages in YLDs
    sugar = risk.loc[risk['rei_name'] == 'Diet high in sugar-sweetened beverages']
    sugar = sugar.rename(columns={'val': 'sugar'})

    # Diet high in sodium in YLDs
    sodium = risk.loc[risk['rei_name'] == 'Diet high in sodium']
    sodium = sodium.rename(columns={'val': 'sodium'})

    # Merge risk dataset
    df_risk = pd.merge(chol[['location_name', 'sex_name', 'year', 'cholesterol']],
                       glucose[['location_name', 'sex_name', 'year', 'glucose']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       bmi[['location_name', 'sex_name', 'year', 'bmi']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       legumes[['location_name', 'sex_name', 'year', 'legumes']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       fruits[['location_name', 'sex_name', 'year', 'fruits']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       milk[['location_name', 'sex_name', 'year', 'milk']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       grains[['location_name', 'sex_name', 'year', 'grains']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       meat[['location_name', 'sex_name', 'year', 'processed_meat']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       veg[['location_name', 'sex_name', 'year', 'veg']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       sugar[['location_name', 'sex_name', 'year', 'sugar']],
                       on=['location_name', 'sex_name', 'year'])

    df_risk = pd.merge(df_risk,
                       sodium[['location_name', 'sex_name', 'year', 'sodium']],
                       on=['location_name', 'sex_name', 'year'])

    # Import the Direct Risk Dataset
    direct = pd.read_csv('#######/parkinsons/data/IHME-GBD_2017_DATA-direct_cause_PD.csv')

    # Smoking in YLDs
    smoking = direct.loc[direct['rei_name'] == 'Smoking']
    smoking = smoking.rename(columns={'val': 'smoking'})

    df_risk = pd.merge(df_risk,
                       smoking[['location_name', 'sex_name', 'year', 'smoking']],
                       on=['location_name', 'sex_name', 'year'])

    # Import the Parkinson's prevalence dataset
    cause = pd.read_csv('##########/parkinsons/data/IHME-GBD_2017_DATA-PD_Incidence_prevalence.csv')
    pv = cause.loc[cause["measure_id"] == 5]  # Prevalence of Parkinsons
    pv = pv.rename(columns={"val": "prevalence"})

    incidence = cause.loc[cause["measure_id"] == 6]  # Incidence of Parkinsons
    incidence = incidence.rename(columns={"val": "incidence"})

    df = pd.merge(df_risk,
                  pv[['location_name', 'sex_name', 'year', 'prevalence']],
                  on=['location_name', 'sex_name', 'year'])

    df = pd.merge(df,
                  incidence[['location_name', 'sex_name', 'year', 'incidence']],
                  on=['location_name', 'sex_name', 'year'])

    # Import the region dataset
    regions = pd.read_csv('######/parkinsons/data/region.csv')
    regions.rename({'Country': 'location_name'}, axis='columns', inplace=True)
    regions.rename({'Group': 'regions'}, axis='columns', inplace=True)

    # Merge the data and create a single df
    df = pd.merge(df,
                  regions,
                  on="location_name")

    a = ['Smoking']
    a.extend(risk.rei_name.unique())
    risk_list = sorted(a)

    return df, risk_list


In [3]:
df, risk_list = process_data()

In [40]:
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919
location_name,China,China,China,China,China,China,China,China,China,China,...,Palestine,Palestine,Palestine,Palestine,Palestine,Palestine,Palestine,Palestine,Palestine,Palestine
sex_name,Male,Female,Male,Female,Male,Female,Male,Female,Male,Female,...,Male,Female,Male,Female,Male,Female,Male,Female,Male,Female
year,1990,1990,1991,1991,1992,1992,1993,1993,1994,1994,...,2013,2013,2014,2014,2016,2016,2017,2017,2015,2015
cholesterol,0.00632528,0.00627755,0.00642883,0.00641293,0.00654552,0.00655452,0.00666296,0.00670145,0.00679129,0.00685312,...,0.00489912,0.00535024,0.00502677,0.00547471,0.00534729,0.00576322,0.00551187,0.00592151,0.005179,0.00561786
glucose,0.0439183,0.0331346,0.0445493,0.0339091,0.0452288,0.0346681,0.0459165,0.0353788,0.0466277,0.0360475,...,0.0509148,0.0507453,0.0518719,0.0516554,0.0542366,0.0538336,0.0554141,0.0550194,0.0530673,0.0527228
bmi,0.0144409,0.0138727,0.0149537,0.0143493,0.0155098,0.0148698,0.0160989,0.0154218,0.0167248,0.0160186,...,0.0456919,0.0544267,0.0466951,0.0556203,0.0490403,0.0583354,0.0501641,0.0597631,0.0478876,0.0569674
legumes,0.000348051,0.000397723,0.00035953,0.000412357,0.00037239,0.000428001,0.000385762,0.000444516,0.000399938,0.000461662,...,0.000443466,0.000375657,0.000450341,0.000381652,0.00046949,0.000394414,0.000478765,0.000401123,0.000459525,0.000388041
fruits,0.0156839,0.0134445,0.0159473,0.0136962,0.0162179,0.0139409,0.0164763,0.0141667,0.0167397,0.014398,...,0.0091375,0.00914642,0.00930984,0.00933833,0.00971659,0.00973318,0.00991461,0.0099552,0.00950879,0.00953359
milk,8.27816e-05,6.51394e-05,8.57648e-05,6.71923e-05,8.87602e-05,6.91145e-05,9.13989e-05,7.09769e-05,9.40208e-05,7.27584e-05,...,4.87791e-05,5.96921e-05,4.97371e-05,5.99818e-05,5.25714e-05,6.27846e-05,5.45849e-05,6.44929e-05,5.10895e-05,6.14124e-05
grains,0.0170928,0.0143627,0.0174667,0.0147253,0.0178618,0.0150899,0.0182521,0.0154404,0.018635,0.0157742,...,0.0158866,0.0155861,0.0162004,0.0159176,0.0169623,0.0166725,0.0173462,0.0170892,0.0165761,0.0162959


In [43]:
# Import the Direct Risk Dataset
direct = pd.read_csv('#######/parkinsons/data/IHME-GBD_2017_DATA-direct_cause_PD.csv')

direct

Unnamed: 0,measure_id,measure_name,location_id,location_name,sex_id,sex_name,age_id,age_name,cause_id,cause_name,rei_id,rei_name,metric_id,metric_name,year,val,upper,lower
0,3,YLDs (Years Lived with Disability),11,Indonesia,1,Male,22,All Ages,544,Parkinson's disease,98,Tobacco,2,Percent,1990,-0.169523,-0.069466,-0.290830
1,3,YLDs (Years Lived with Disability),11,Indonesia,2,Female,22,All Ages,544,Parkinson's disease,98,Tobacco,2,Percent,1990,-0.008222,-0.002765,-0.015273
2,3,YLDs (Years Lived with Disability),11,Indonesia,1,Male,22,All Ages,544,Parkinson's disease,99,Smoking,2,Percent,1990,-0.169523,-0.069466,-0.290830
3,3,YLDs (Years Lived with Disability),11,Indonesia,2,Female,22,All Ages,544,Parkinson's disease,99,Smoking,2,Percent,1990,-0.008222,-0.002765,-0.015273
4,3,YLDs (Years Lived with Disability),11,Indonesia,1,Male,22,All Ages,544,Parkinson's disease,169,All risk factors,2,Percent,1990,-0.169523,-0.069466,-0.290830
5,3,YLDs (Years Lived with Disability),11,Indonesia,2,Female,22,All Ages,544,Parkinson's disease,169,All risk factors,2,Percent,1990,-0.008222,-0.002765,-0.015273
6,3,YLDs (Years Lived with Disability),11,Indonesia,1,Male,22,All Ages,544,Parkinson's disease,203,Behavioral risks,2,Percent,1990,-0.169523,-0.069466,-0.290830
7,3,YLDs (Years Lived with Disability),11,Indonesia,2,Female,22,All Ages,544,Parkinson's disease,203,Behavioral risks,2,Percent,1990,-0.008222,-0.002765,-0.015273
8,3,YLDs (Years Lived with Disability),17,Sri Lanka,1,Male,22,All Ages,544,Parkinson's disease,98,Tobacco,2,Percent,1990,-0.200064,-0.095694,-0.315454
9,3,YLDs (Years Lived with Disability),17,Sri Lanka,2,Female,22,All Ages,544,Parkinson's disease,98,Tobacco,2,Percent,1990,-0.005957,-0.002393,-0.010318
