In [1]:
import os
from os.path import join

import glob
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd

import config_local as cl
import Loaders
import pcp_hf

In [2]:
body = Loaders.get_BodyMeasuresLoaderDF()
blood = Loaders.get_BloodTestsLoaderDF()
ecg = Loaders.get_ECGTextLoader()
subject = Loaders.get_SubjectLoader()

from LabData.DataLoaders.LifeStyleLoader import LifeStyleLoader
life = LifeStyleLoader().get_data(study_ids='10K', groupby_reg='first')

In [128]:
pcp_body = body.df.droplevel(['Date'])[['standing_one_min_blood_pressure_systolic','bmi']]
pcp_ag = subject

pcp_smoke = life.df['smoke_tobacco_now'].droplevel(['Date'])
pcp_smoke = pcp_smoke.apply(lambda x: 0 if x==0 else 1)
pcp_blood = blood.df.droplevel(['Date'])[['bt__glucose', 'bt__total_cholesterol', 'bt__hdl_cholesterol']]
pcp_ecg = ecg.df.droplevel(['Date'])['qrs_ms']
pcp_features = pd.concat([pcp_body, pcp_ag, pcp_blood, pcp_ecg, pcp_smoke], axis=1)
pcp_features = pcp_features.dropna()
pcp_features

Unnamed: 0_level_0,standing_one_min_blood_pressure_systolic,bmi,age,gender,bt__glucose,bt__total_cholesterol,bt__hdl_cholesterol,qrs_ms,smoke_tobacco_now
RegistrationCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10K_1001201093,103.0,20.553633,43.0,0.0,89.0,172.4,64.0,92.0,1.0
10K_1002087123,135.0,36.660603,45.0,0.0,95.0,201.7,38.0,94.0,0.0
10K_1003113258,145.0,23.860134,50.0,1.0,104.0,155.0,48.0,102.0,0.0
10K_1007599726,91.0,23.051754,58.0,0.0,84.0,225.2,70.0,92.0,0.0
10K_1007699078,137.0,25.233089,56.0,1.0,87.0,162.5,51.0,90.0,0.0
...,...,...,...,...,...,...,...,...,...
10K_9991294748,97.0,26.978783,47.0,0.0,84.0,172.0,51.0,92.0,1.0
10K_9994795317,134.0,26.277285,49.0,1.0,124.0,208.0,36.0,90.0,0.0
10K_9996884777,106.0,30.736282,53.0,0.0,91.0,181.0,48.0,104.0,1.0
10K_9999226141,129.0,24.624701,44.0,1.0,87.0,214.0,45.3,112.0,0.0


In [129]:
pcp_risk = pd.Series(pcp_features.apply(lambda row: PCPHF(
                             age=row.age, gender=row.gender, systolic=row.standing_one_min_blood_pressure_systolic, glucose=row.bt__glucose, cholesterol=row.bt__total_cholesterol, hdl=row.bt__hdl_cholesterol, qrs=row.qrs_ms, bmi=row.bmi, smoker=row.smoke_tobacco_now)
                                        , axis=1), name='pcp_hf')
pcp_risk

RegistrationCode
10K_1001201093    0.001468
10K_1002087123    0.015183
10K_1003113258    0.004430
10K_1007599726    0.009308
10K_1007699078    0.007401
                    ...   
10K_9991294748    0.007476
10K_9994795317    0.004163
10K_9996884777    0.026312
10K_9999226141    0.001497
10K_9999409119    0.050111
Name: pcp_hf, Length: 6073, dtype: float64

In [130]:
cl.save_pickle(pcp_risk, join(cl.DB_RISK_SCORE_PATH, 'y'), 'pcp_hf.pickle')

In [103]:
def PCPHF(age, gender,
          systolic, glucose, cholesterol, hdl, bmi, qrs,
          race='white', is_systolic_treated=False, smoker=0, is_glucose_treated=0):

    coeff = cl.load_pickle(join(cl.DB_PATH, 'RiskScores', 'pcp_hf', 'coeff.pickle'))
    score = 0

    ln_age = np.log(age)
    ln_systolic = np.log(systolic)
    ln_glucose = np.log(glucose)
    ln_cholesterol = np.log(cholesterol)
    ln_hdl = np.log(hdl)
    ln_bmi = np.log(bmi)
    ln_qrs = np.log(qrs)

    if gender==0 and race=='white':
        coeff = coeff['white_male']
    if gender==1 and race=='white':
        coeff = coeff['white_female']
    if gender==0 and race=='black':
        coeff = coeff['black_male']
    if gender==1 and race=='black':
        coeff = coeff['black_female']

    score += ln_age * coeff['ln_age']
    score += ln_age**2 * coeff['ln_age_squared']
    score += ln_systolic * coeff['ln_sys_treated'] if is_systolic_treated \
        else ln_systolic * coeff['ln_sys_untreated']
    score += ln_age * ln_systolic * coeff['ln_age_sys_treated'] if is_systolic_treated\
        else ln_age * ln_systolic * coeff['ln_age_sys_untreated']
    score += smoker * coeff['smoker']
    score += ln_age * smoker * coeff['ln_age_smoker']
    score += ln_glucose * coeff['ln_glucose_treated'] if is_glucose_treated\
    else ln_glucose * coeff['ln_glucose_untreated']
    score += ln_cholesterol * coeff['ln_cholesterol']
    score += ln_hdl * coeff['ln_hdl']
    score += ln_bmi * coeff['ln_bmi']
    score += ln_age * ln_bmi * coeff['ln_age_bmi']
    score += ln_qrs * coeff['ln_qrs']

    mean_cv = coeff['mean_cv']
    s0 = coeff['s0']

    risk = 1 - s0 ** (np.exp(score - mean_cv))

    return risk

# def PCPHF(ser):
#     age = ser.age
#     gender = ser.gender
#     systolic=ser.standing_one_min_blood_pressure_systolic
#     glucose=ser.bt__glucose
#     cholesterol=ser.bt__total_cholesterol
#     hdl=ser.bt__hdl_cholesterol
#     qrs=ser.qrs_ms
#     bmi=ser.bmi
#
#     risk = _PCPHF(age=age, gender=gender, systolic=systolic, glucose=glucose, cholesterol=cholesterol, hdl=hdl, qrs=qrs, bmi=bmi)
#
#     return risk