In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/risk-score-of-patients/patients.csv


# CHA-DS-VASc score, which predicts a patient's stroke risk when patients 

In [2]:
def cha2ds2_vasc(age, sex, chf, hypertension, stroke, vascular, diabetes):
    """
    (int,str,bool,bool,bool,bool,bool)
    Return the CHA-DS-VASc score, which predicts a patient's stroke risk when patients have an existing 
    atrial fibrillation. The score ranges from low (0) to high (9).
    
    >>> cha2ds2_vasc(30,'Female',False,False,False,False,False)
    1
    
    >>> cha2ds2_vasc(65,'Male',False,True,True,False,True)
    5
    """
    
    AFS_score = 0
    history = [chf, hypertension, stroke, vascular, diabetes]
    
    if age < 65:
        AFS_score += 0
    elif (age >= 65) and (age <= 74):
        AFS_score += 1
    else:
        AFS_score += 2
    
    if sex == 'Female':
        AFS_score += 1 
    else:
        AFS_score += 0
        
    for index,condition in enumerate(history):
        if condition == False:
            AFS_score += 0
        else:
            if index == 2:
                AFS_score += 2
            else:
                AFS_score += 1
        
    return(AFS_score)

# Framingham Score 

In [3]:
import math

def framingham(age, sex, smoker, cholesterol, hdl, systolic, bp_treated):
    """
    (int,str,bool,int,int,int,bool)
    Returns a Framingham risk score, which predicts a patient's risk for hard coronary heart disease for non-diabetic patients.
    
    >>> framingham(30, 'Female', False, 150, 40, 120, False)
    0.0002
    >>> framingham(67, 'Female', False, 160, 60, 120, False)
    0.0173
    """ 

    if (age < 30) or (age > 79):
        P = -1
    
    else: 
        
        #Coefficents: male,female 
        Beta = { 'age':(52.00961,31.764001),'cholesterol':(20.014077,22.465206),
                 'hdl':(-0.905964,-1.187731),'systolic':(1.305784,2.552905),
                 'bp_treated':(0.241549,0.420251),'smoker':(12.096316,13.07543),
                 'age_cholesterol':(-4.605038,-5.060998),'age_smoker':(-2.84367,-2.996945),
                 'age_age':(-2.93323,0)
               }

        #Adjustments for males, age_smoker
        if sex == 'Male':
                s = 0
                constant = 0.9402
                y_intercept = -172.300168

                #Adjustment for male age > 70
                if age > 70:
                    age_smoker = (Beta['age_smoker'][s])*(math.log(70)* int(smoker))
                else:
                    age_smoker = (Beta['age_smoker'][s])*(math.log(age)*int(smoker))     

        #Adjustments for females, age_smoker
        else:
                s = 1
                constant = 0.98767
                y_intercept = -146.5933061

                #Adjustment for female age > 78 (irrelevant -> P = -1)
                if age > 78:
                    age_smoker = (Beta['age_smoker'][s])*(math.log(78)*int(smoker))
                else:
                    age_smoker = (Beta['age_smoker'][s])*(math.log(age)*int(smoker))   

        #Calculate formula 
        L = [ (Beta['age'][s])*math.log(age) + (Beta['cholesterol'][s])*math.log(cholesterol) + (Beta['hdl'][s])*math.log(hdl) + 
              (Beta['systolic'][s])*math.log(systolic) + (Beta['bp_treated'][s])*int(bp_treated) + (Beta['smoker'][s])*int(smoker) + 
              (Beta['age_cholesterol'][s])*(math.log(age)*math.log(cholesterol)) + age_smoker + 
              (Beta['age_age'][s])*(math.log(age)*math.log(age)) + y_intercept
            ]
        
        P = 1 - constant**(math.exp(L[0]))
            
    return(round(P,4))       

# Heart Score

In [4]:
def heart(history, ekg, age, risks, troponin):
    """
    (str,str,int,int,float) -> int
    Returns a HEART score, which predicts a patient's risk for major cardiac events from 0 (low) to 10 (high).
    
    >>> heart('Slightly suspicious','Normal',30, 0, 0)
    0
    >>> heart('Highly suspicious','Significant ST deviation',71, 7, 5)
    10
    """
    
    heart_score = 0
    
    if history.split()[0] == 'Slightly':
        heart_score += 0 
    elif history.split()[0] == 'Moderately':
        heart_score += 1 
    else:
        heart_score += 2
    
    if ekg.split()[0] == 'Normal':
        heart_score += 0
    elif ekg.split()[0] == 'Non-specific':
        heart_score += 1
    else: 
        heart_score += 2
    
    if age < 45:
        heart_score += 0  
    elif (age >= 45) and (age <= 64):
        heart_score += 1
    else:
        heart_score += 2
        
    if risks == 0:
        heart_score += 0
    elif (risks == 1) or (risks == 2):
        heart_score += 1
    else:
        heart_score += 2
        
    if troponin <= 1.0:
        heart_score += 0
    elif (troponin > 1.0) and (troponin <= 3.0):
        heart_score += 1
    else:
        heart_score += 2
        
    return(heart_score)

## A patient is classified as "High Risk" if they meet all three criteria below:

CHA2DS2_VASc >= 2
HEART >= 4
Framingham >= 3%

In [5]:
import pandas as pd
import numpy as np

def patient_scores(filename): 
    """ 
    (csv file) -> List
    Returns a list that contains patients id, common risk scores, and their calculated high risk 
    (formula based on predictive health scores). 
    """
    patients = pd.read_csv(filename)
    
    #manipulate patient data 
    patients.rename(columns={'chf history':'chf',
                         'hypertension history':'hypertension',
                         'stroke history':'stroke',
                         'vascular disease history': 'vascular',
                         'diabetes history':'diabetes',
                         'risk factors':'risks',
                         'total cholesterol':'cholesterol',
                         'hdl cholesterol':'hdl',
                         'systolic bp':'systolic',
                         'bp medicine':'bp_treated'},inplace=True)
    patients['sex']=patients['sex'].replace({'M':'Male','F':'Female'})
    patients.replace({'Yes':True,'No':False},inplace=True)
    
    #add scores as columns in patients 
    patients['CHA2DS2_VASc'] = np.vectorize(cha2ds2_vasc)(patients.age, patients.sex, patients.chf, patients.hypertension, patients.stroke, patients.vascular, patients.diabetes)
    patients['HEART'] = np.vectorize(heart)(patients.history, patients.ekg, patients.age, patients.risks, patients.troponin)
    patients['Framingham']=np.vectorize(framingham,otypes=[float])(patients.age, patients.sex, patients.smoker, patients.cholesterol, patients.hdl, patients.systolic, patients.bp_treated)

    #determine/add High Risk column in patients 
    conditions = [
        (patients.CHA2DS2_VASc >= 2) & (patients.HEART >= 4) & (patients.Framingham*100 >= 3.0),
        (patients.CHA2DS2_VASc < 2) & (patients.HEART < 4) & (patients.Framingham*100 < 3.0)
    ]

    values = [True,False]  
    patients['High Risk']=np.select(conditions,values)
    patients['High Risk'].replace({1:True,0:False},inplace=True)
    
    #create list for each patient 
    answers = patients[['CHA2DS2_VASc','HEART','Framingham','High Risk']].values.tolist()
    
    return(answers)


In [6]:
res = patient_scores('../input/risk-score-of-patients/patients.csv')
res

[[7, 7, -1.0, False],
 [2, 3, -1.0, False],
 [7, 4, 0.0015, False],
 [5, 5, -1.0, False],
 [3, 4, 0.005, False],
 [6, 7, -1.0, False],
 [2, 4, -1.0, False],
 [4, 8, 0.0391, True],
 [6, 6, -1.0, False],
 [2, 7, 0.0437, True],
 [6, 5, -1.0, False],
 [3, 2, 0.0465, False],
 [4, 5, 0.0189, False],
 [3, 2, -1.0, False],
 [5, 4, 0.0016, False],
 [3, 7, 0.0126, False],
 [1, 6, 0.0126, False],
 [2, 4, 0.0153, False],
 [4, 3, 0.0183, False],
 [9, 7, 0.0239, False],
 [2, 3, -1.0, False],
 [4, 6, -1.0, False],
 [3, 7, 0.0053, False],
 [5, 2, 0.0142, False],
 [4, 6, -1.0, False],
 [5, 5, 0.1098, True],
 [3, 7, 0.0709, True],
 [5, 6, 0.0293, False],
 [3, 3, 0.0244, False]]