Removed First row from the PPG-BP dataset xlsx file
Converted new xlsx to CSV for faster execution
New CSV file named as SubjectData.csv
PPGBPDatabase folder contains all the ppg cycle of the subjects in TXT format

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import kurtosis
from scipy.stats import skew

## Importing SubjectData CSV file using Pandas

In [2]:
SubjectData=pd.read_csv('SubjectData.csv')
SubjectData.head()

Unnamed: 0,Num.,subject_ID,Sex(M/F),Age(year),Height(cm),Weight(kg),Systolic Blood Pressure(mmHg),Diastolic Blood Pressure(mmHg),Heart Rate(b/m),BMI(kg/m^2),Hypertension,Diabetes,cerebral infarction,cerebrovascular disease
0,1,2,Female,45,152,63,161,89,97,27.27,Stage 2 hypertension,,,
1,2,3,Female,50,157,50,160,93,76,20.28,Stage 2 hypertension,,,
2,3,6,Female,47,150,47,101,71,79,20.89,Normal,,,
3,4,8,Male,45,172,65,136,93,87,21.97,Prehypertension,,,
4,5,9,Female,46,155,65,123,73,73,27.06,Prehypertension,,,


### Data Cleaning
Replacing NaN with 0 in Cerebral Infarction and 'cerebral infarction' as 1 <br>
0 = No Cerebral Infarction <br>
1 = Cerebral Infarction

In [3]:
SubjectData['cerebral infarction'] = SubjectData['cerebral infarction'].fillna(0)
SubjectData['cerebral infarction']=SubjectData['cerebral infarction'].replace('cerebral infarction',1)

In [4]:
countCI=SubjectData['cerebral infarction'].value_counts()
print('Total number of subjects :',countCI[1]+countCI[0])
print('Total number of subjects with CI:',countCI[1])
print('Total number of subjects without CI:',countCI[0])

Total number of subjects : 219
Total number of subjects with CI: 20
Total number of subjects without CI: 199


### Importing PPG Cycle of the Target Patients in a dictionary


In [5]:
baseurl='PPGBPDatabase/'
PPGCycle={}
for i in range(420):
    try:
        temp=[]
        for j in range(1,4):
            f=open(baseurl+f'{i}_{j}.txt','r')
            data=f.readline()
            data=data.split('\t')
            data=data[0:-1]
            data=list(map(int,list(map(float,data))))
            f.close()
            temp.append(data)  
        PPGCycle[str(i)]=temp
    except:
        continue


### Extracting Infromation from the PPGCycle

In [6]:
extractPPGCycle={}
CMean=[[],[],[],[]]
CKurtosis=[[],[],[],[]]
CSkewness=[[],[],[],[]]
for i in PPGCycle.keys():
    temp=[]
    alldata=[]
    for j in range(3):
        data=PPGCycle[i][j]
        alldata+=data
        tempdict={}
        tempdict['Mean']=np.mean(data)
        tempdict['Kurtosis']=kurtosis(data, fisher=False)
        tempdict['Skewness']=skew(data)
        CMean[j].append(tempdict['Mean'])
        CKurtosis[j].append(tempdict['Kurtosis'])
        CSkewness[j].append(tempdict['Skewness'])
        temp.append(tempdict)
    tempdict={}
    tempdict['Mean']=np.mean(alldata)
    tempdict['Kurtosis']=kurtosis(alldata, fisher=False)
    tempdict['Skewness']=skew(alldata)
    CMean[3].append(tempdict['Mean'])
    CKurtosis[3].append(tempdict['Kurtosis'])
    CSkewness[3].append(tempdict['Skewness'])
    temp.append(tempdict)
    extractPPGCycle[i]=temp

### Inserting Extracted Data into the SubjectData

In [7]:
SubjectData['C1-Mean']=CMean[0]
SubjectData['C1-Kurtosis']=CKurtosis[0]
SubjectData['C1-Skewness']=CSkewness[0]
SubjectData['C2-Mean']=CMean[1]
SubjectData['C2-Kurtosis']=CKurtosis[1]
SubjectData['C2-Skewness']=CSkewness[1]
SubjectData['C3-Mean']=CMean[2]
SubjectData['C3-Kurtosis']=CKurtosis[2]
SubjectData['C3-Skewness']=CSkewness[2]
SubjectData['C-Mean']=CMean[3]
SubjectData['C-Kurtosis']=CKurtosis[3]
SubjectData['C-Skewness']=CSkewness[3]


In [8]:
SubjectData.head()

Unnamed: 0,Num.,subject_ID,Sex(M/F),Age(year),Height(cm),Weight(kg),Systolic Blood Pressure(mmHg),Diastolic Blood Pressure(mmHg),Heart Rate(b/m),BMI(kg/m^2),...,C1-Skewness,C2-Mean,C2-Kurtosis,C2-Skewness,C3-Mean,C3-Kurtosis,C3-Skewness,C-Mean,C-Kurtosis,C-Skewness
0,1,2,Female,45,152,63,161,89,97,27.27,...,0.614909,2033.933333,2.072548,0.505147,2045.224762,2.22378,0.66608,2038.692381,2.77884,0.662289
1,2,3,Female,50,157,50,160,93,76,20.28,...,0.542265,2001.574286,2.009247,0.393794,2007.157143,1.883882,0.421452,2004.373968,2.013632,0.44916
2,3,6,Female,47,150,47,101,71,79,20.89,...,0.058724,2037.16619,2.073974,0.055324,2020.478571,1.817075,0.493795,2013.143492,2.355045,0.504163
3,4,8,Male,45,172,65,136,93,87,21.97,...,0.602817,2014.73,1.880641,0.418807,2021.939048,2.00519,0.50334,2023.992222,2.155941,0.54616
4,5,9,Female,46,155,65,123,73,73,27.06,...,0.177446,2003.890952,2.206517,0.351171,2010.850952,2.008548,0.473827,2011.915079,2.074053,0.354144
