In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
class LR():
    def __init__(self, max_iter=300):
        self.max_iter=max_iter
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_modelLR

    @property
    def preprocess(self):
        df = pd.read_csv('/work/heart_data.csv')
        #normalization
        #colName = df[['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']]
        colIndex = [1,4,5,6,8,10]
        minmax = MinMaxScaler()
        normalHeartData = minmax.fit_transform(df[df.columns[colIndex]])
        X_Data = normalHeartData
        Y_Data = df ['HeartDisease']
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.5, random_state=1)
        return X_Train, X_Test, Y_Train, Y_Test 
    
    @property
    def create_modelLR(self):
        """
        return : Model Object 
        """
        modelLR = LogisticRegression(max_iter=self.max_iter)
        return modelLR
        
    @property
    def create_modelNN(self):
        """
        return : Model Object 
        """
        
        modelNN = LogisticRegression(max_iter=self.max_iter)
        return modelNN
    
    @property
    def train(self):
        """
        return None Train the Model
        """
        self.model.fit(self.X_Train, self.Y_Train)
        
    @property   
    def test(self):
        """
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        """
        pred = self.model.predict(self.X_Test)
        return pred,self.model.coef_ , self.model.intercept_
    
    @property
    def download_report(self):
        """
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        """
        pred, coef_ , intercept_ =self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
        return report, matrix

In [None]:
LogR = LR()
LogR.train
pred,coef_,intercept_ = LogR.test
report, matrix = LogR.download_report
print(report, '\n', matrix)

              precision    recall  f1-score   support

           0       0.76      0.73      0.74       184
           1       0.79      0.81      0.80       225

    accuracy                           0.78       409
   macro avg       0.77      0.77      0.77       409
weighted avg       0.77      0.78      0.77       409
 
 [[134  50]
 [ 42 183]]


In [None]:
heartData = pd.read_csv("/work/heart_data.csv")

In [None]:
# Columns that have been extracted to be used
cols = heartData[['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']]
colIndex = [1,4,5,6,8,10] # chosen columns from our data were: [Age, RestingBP, Cholesterol, FastingBS, MaxHR, Old Peak]; values only
heartData_HeartDisease_extracted = heartData[['HeartDisease']] # data to be tested on

# Normalization
stdHeartData = heartData[heartData.columns[colIndex]]
scaler = StandardScaler() 
heartDataScaled = scaler.fit_transform(stdHeartData)

#Normalization2
minmaxHeartData = heartData[heartData.columns[colIndex]]
minmax = MinMaxScaler()
heartDataMM = minmax.fit_transform(minmaxHeartData)

print(heartDataScaled.mean(axis=0))
print(heartDataScaled.std(axis=0))
print("\n###########################\n")
print('Min values (Age, RestingBP, Cholesterol, FastingBS, MaxHR): ', heartDataScaled.min(axis=0))
print('Max values (Age, RestingBP, Cholesterol, FastingBS, MaxHR): ', heartDataScaled.max(axis=0))

[-5.21180491e-17 -2.17158538e-18 -7.16623175e-17 -7.81770736e-17
 -2.04129026e-16  3.90885368e-17]
[1. 1. 1. 1. 1. 1.]

###########################

Min values (Age, RestingBP, Cholesterol, FastingBS, MaxHR):  [-2.68866998 -7.0939772  -1.83184956 -0.54627163 -3.01336003 -3.25833998]
Max values (Age, RestingBP, Cholesterol, FastingBS, MaxHR):  [2.47531824 3.61341896 3.74806746 1.83059111 2.5349367  4.98386212]


In [None]:
#Data to be used to train for the models 
X_Data = heartDataMM
Y_Data = heartData_HeartDisease_extracted

#Spilt + Training
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.6, random_state=1)

#Instantiation of the model
modelNN = MLPClassifier(max_iter=300, hidden_layer_sizes=(100))

#fitting training data, train model on training data 
modelNN.fit(X_Train, Y_Train)

#GROUD TRUTH - This is data that we would already have; it exists
#Carrying out predictions on test data
modelNNPred = modelNN.predict(X_Test)

print("NN Classification report")
print(classification_report(Y_Test, modelNNPred))


  y = column_or_1d(y, warn=True)
NN Classification report
              precision    recall  f1-score   support

           0       0.75      0.76      0.75       212
           1       0.82      0.81      0.81       279

    accuracy                           0.79       491
   macro avg       0.78      0.78      0.78       491
weighted avg       0.79      0.79      0.79       491



In [None]:
# TEST FOR 100+ NUERONS

#Data to be used to train for the models 
X_Data = heartDataMM
Y_Data = heartData_HeartDisease_extracted

#Spilt + Training
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.6, random_state=1)

#Instantiation of the model
modelNN = MLPClassifier(max_iter=300, hidden_layer_sizes=(100,6))

#fitting training data, train model on training data 
modelNN.fit(X_Train, Y_Train)

#GROUD TRUTH - This is data that we would already have; it exists
#Carrying out predictions on test data
modelNNPred = modelNN.predict(X_Test)

print("NN Classification report")
print(classification_report(Y_Test, modelNNPred))

  y = column_or_1d(y, warn=True)
NN Classification report
              precision    recall  f1-score   support

           0       0.75      0.78      0.76       212
           1       0.83      0.80      0.81       279

    accuracy                           0.79       491
   macro avg       0.79      0.79      0.79       491
weighted avg       0.79      0.79      0.79       491



# Using Heart_Predict.csv
### For Logsitic Regression And Neural Network

In [None]:
testHeartData = pd.read_csv('/work/heart_predict.csv')
testHeartData.head(5)

Unnamed: 0,PatientId,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,818,36,M,NAP,112,340,0,Normal,184,N,1.0,Flat
1,819,63,M,ASY,170,177,0,Normal,84,Y,2.5,Down
2,820,58,M,ASY,114,318,0,ST,140,N,4.4,Down
3,821,64,M,ASY,144,0,0,ST,122,Y,1.0,Flat
4,822,46,M,ASY,118,186,0,Normal,124,N,0.0,Flat


In [None]:
# New Input Data
X_DataNew = testHeartData[['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']].values

In [None]:
# Transformation Of New Data - This is done alone and with FIT as to avoid any recalculations
X_DataNew = scaler.transform(X_DataNew)
# New predictions to be made using our model
predictions = modelNN.predict(X_DataNew)
patientId = X_DataNew[0]
print(len(X_DataNew), len(predictions)) 

100 100
  "X does not have valid feature names, but"


In [None]:

submission = pd.DataFrame({'PatientId': testHeartData['PatientId'], 'HeartDisease': predictions})
submission.to_csv('submission2.csv', index=False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=e279e481-5592-4b23-988f-b9d98935e6dc' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>