# Continuous Learning
This notebook contains the code for a continues learning prototype

In [258]:
import os
import joblib
import numpy as np
from pathlib import Path

import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt

## Load the dataset
Load the clean dataset and split it in half so that we can apply continuous learning on the second dataset

In [259]:
# Lead the clean dataset
df = pd.read_csv("out/Clean.csv", sep=";", decimal=",")

# Drop non-numeric columns
columns_to_drop = ['CaseNumber', 'LastName', 'PostCode']
df = df.drop(columns=columns_to_drop)

In [260]:
# Separate features from the target variable
x = df.drop(columns='HeartDisease')
y = df['HeartDisease']

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=42)

# Standardize the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
num_rows = len(x_train)

# Split the train set into two equal halves
train_x_1 = x_train
train_y_1 = y_train

x_test = scaler.transform(x_test)

test_len = len(x_test)

x_test_1 = x_test[:test_len // 2]
y_test_1 = y_test[:test_len // 2]

x_test_2 = x_test[test_len // 2:]
y_test_2 = y_test[test_len // 2:]

# Train the KNN classifier
classifier = KNeighborsClassifier(n_neighbors=5)

In [261]:
classifier.fit(train_x_1, train_y_1)
pred_y_1 = classifier.predict(x_test_1)
accuracy = np.sum(y_test_1 == pred_y_1) / len(y_test_1) * 100
print(f"Accuracy after first training: {round(accuracy, 1)}%")

# export the model to a file
joblib.dump(classifier, 'out/trained_model.mdl')

print("Exporting model....")


Accuracy after first training: 84.7%
Exporting model....


In [262]:
print("Loading model.....")

classifier = joblib.load('out/trained_model.mdl')

if accuracy < 90:
	print("Training model for a second time")
	classifier.fit(x_test_2, y_test_2)

pred_y_1 = classifier.predict(x_test_1)
accuracy = np.sum(y_test_1 == pred_y_1) / len(y_test_1) * 100
print(f"Accuracy after second training: {round(accuracy, 1)}%")

Loading model.....
Training model for a second time
Accuracy after second training: 87.4%
