### Submitted By:
# Indrashis Paul
# 19MIM10046

# Importing required dependencies

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold, GridSearchCV
from sklearn.linear_model import Perceptron

# Loading data

In [2]:
data = pd.read_csv('heart-categoric.csv')
print(data.shape)
data.head()

(23, 5)


Unnamed: 0,age -,bp -,ch -,hr -,hd
0,Old,Low,Low,Medium,No
1,VeryOld,Low,Medium,Low,Yes
2,Old,Medium,Medium,Medium,No
3,Old,Low,Low,Low,Yes
4,Mild,Low,Low,Medium,Yes


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   age -   23 non-null     object
 1   bp -    23 non-null     object
 2   ch -    23 non-null     object
 3   hr -    23 non-null     object
 4   hd      23 non-null     object
dtypes: object(5)
memory usage: 1.0+ KB


# Preprocessing
1. Label Encoding the Target Column
2. Standard scaling the numerical data 

In [5]:
le = LabelEncoder()

for col in data.columns:
    data[col] = le.fit_transform(data[col])
data.head()

Unnamed: 0,age -,bp -,ch -,hr -,hd
0,1,1,1,1,0
1,2,1,2,0,1
2,1,2,2,1,0
3,1,1,1,0,1
4,0,1,1,1,1


# Building the Perceptron for Classification

## Split data

In [12]:
X = data.drop('hd', axis=1)
y = data['hd']

## Using `sklearn.linear_model.Perceptron`

### 1. Default Parameters

In [7]:
# define model
model = Perceptron()

# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
# summarize result
print(scores)
print(f'Mean Accuracy: {np.mean(scores)} ({np.std(scores)})')

[0.66666667 0.66666667 0.66666667 0.5        1.         0.5
 1.         1.         0.5        0.5        0.66666667 0.66666667
 0.66666667 0.5        1.         0.5        1.         0.5
 1.         0.5        1.         0.66666667 1.         0.
 0.5        0.5        1.         0.5        0.5        1.        ]
Mean Accuracy: 0.6888888888888888 (0.2499382639822665)


### 2. Custom Parameters

In [8]:
model = Perceptron(penalty='l1', alpha=0.001, max_iter=2000, tol=1e-3, eta0=1.0021, class_weight='balanced', warm_start=True)

# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
# summarize result
print(scores)
print(f'Mean Accuracy: {np.mean(scores)} ({np.std(scores)})')

[0.66666667 0.66666667 0.66666667 0.5        1.         0.5
 1.         0.5        0.5        0.5        0.66666667 0.66666667
 0.66666667 0.5        0.5        0.5        0.5        0.5
 1.         0.5        1.         0.66666667 1.         0.
 0.5        0.5        1.         0.5        0.5        1.        ]
Mean Accuracy: 0.6388888888888888 (0.23207331749117138)


In [9]:
model = Perceptron(penalty='l2', alpha=0.00001, max_iter=1000, tol=None, eta0=1.005, class_weight='balanced', warm_start=False)

# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
# summarize result
print(scores)
print(f'Mean Accuracy: {np.mean(scores)} ({np.std(scores)})')

[0.33333333 0.66666667 0.66666667 0.5        1.         0.5
 1.         1.         0.5        1.         0.66666667 0.66666667
 0.33333333 0.5        1.         0.5        1.         0.5
 1.         1.         1.         1.         1.         0.
 0.5        0.5        1.         0.5        1.         1.        ]
Mean Accuracy: 0.7277777777777777 (0.28048679025254086)


### 3. Hyperparameter Tuning

In [10]:
# define model
model = Perceptron()
# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define grid
grid = dict()
grid['eta0'] = [0.0001, 0.001, 0.01, 0.1, 1.0]
grid['alpha'] = [0.0001, 0.001, 0.01, 0.1, 1.0]
grid['penalty'] = ['l1', 'l2', 'elasticnet', 'None']
grid['tol'] = [0.0001, 0.001, 0.01, 0.1, 1.0]

# define search
search = GridSearchCV(model, grid, scoring='accuracy', cv=cv, n_jobs=-1)
# perform the search
results = search.fit(X, y)
# summarize
print('Best Mean Accuracy: %.3f' % results.best_score_)
print('Best Config: %s' % results.best_params_)
# summarize all
means = results.cv_results_['mean_test_score']
params = results.cv_results_['params']
for mean, param in zip(means, params):
    print(">%.3f with: %r" % (mean, param))

Best Mean Accuracy: 0.689
Best Config: {'alpha': 0.0001, 'eta0': 0.001, 'penalty': 'None', 'tol': 0.0001}
>0.617 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l1', 'tol': 0.0001}
>0.633 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l1', 'tol': 0.001}
>0.633 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l1', 'tol': 0.01}
>0.633 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l1', 'tol': 0.1}
>0.633 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l1', 'tol': 1.0}
>0.572 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l2', 'tol': 0.0001}
>0.611 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l2', 'tol': 0.001}
>0.611 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l2', 'tol': 0.01}
>0.611 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l2', 'tol': 0.1}
>0.611 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'l2', 'tol': 1.0}
>0.589 with: {'alpha': 0.0001, 'eta0': 0.0001, 'penalty': 'elasticnet', 'tol': 0.0001}
>0.600 with: {'alpha': 0.0001, 'eta

## Using `tf.keras.layers.Dense`

In [11]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [17]:
model = Sequential()
model.add(Dense(16, input_shape=(4,), activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [18]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 16)                80        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 17        
Total params: 97
Trainable params: 97
Non-trainable params: 0
_________________________________________________________________


In [20]:
model.fit(X, y, batch_size=10, epochs=200, verbose=2, validation_split=0.2)

Epoch 1/200
2/2 - 0s - loss: 0.6018 - accuracy: 0.8333 - val_loss: 0.6490 - val_accuracy: 0.8000
Epoch 2/200
2/2 - 0s - loss: 0.6000 - accuracy: 0.8333 - val_loss: 0.6485 - val_accuracy: 0.8000
Epoch 3/200
2/2 - 0s - loss: 0.5964 - accuracy: 0.8333 - val_loss: 0.6456 - val_accuracy: 0.8000
Epoch 4/200
2/2 - 0s - loss: 0.5939 - accuracy: 0.8333 - val_loss: 0.6431 - val_accuracy: 0.8000
Epoch 5/200
2/2 - 0s - loss: 0.5921 - accuracy: 0.8333 - val_loss: 0.6406 - val_accuracy: 0.8000
Epoch 6/200
2/2 - 0s - loss: 0.5899 - accuracy: 0.8333 - val_loss: 0.6386 - val_accuracy: 0.8000
Epoch 7/200
2/2 - 0s - loss: 0.5889 - accuracy: 0.8333 - val_loss: 0.6357 - val_accuracy: 0.8000
Epoch 8/200
2/2 - 0s - loss: 0.5868 - accuracy: 0.8333 - val_loss: 0.6332 - val_accuracy: 0.8000
Epoch 9/200
2/2 - 0s - loss: 0.5856 - accuracy: 0.8333 - val_loss: 0.6311 - val_accuracy: 0.8000
Epoch 10/200
2/2 - 0s - loss: 0.5833 - accuracy: 0.8333 - val_loss: 0.6304 - val_accuracy: 0.8000
Epoch 11/200
2/2 - 0s - loss:

<keras.callbacks.History at 0x7ff70013eb80>

In [21]:
model.evaluate(X, y)



[0.46647995710372925, 0.8260869383811951]