# Continuous Learning
This notebook contains the code for a continues learning prototype

In [6]:
import os
import joblib
import numpy as np
from pathlib import Path

import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt

## Load the dataset
Load the clean dataset and split it in half so that we can apply continuous learning on the second dataset

In [7]:
# Lead the clean dataset
df = pd.read_csv("out/Clean.csv", sep=";", decimal=",")

# Drop non-numeric columns
columns_to_drop = ['CaseNumber', 'LastName', 'PostCode']
df = df.drop(columns=columns_to_drop)

In [8]:
# Separate features from the target variable
x = df.drop(columns='HeartDisease')
y = df['HeartDisease']

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=42)

# Standardize the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
num_rows = len(x_train)

# Split the train set into two equal halves
train_x_1 = x_train
train_y_1 = y_train

# Transform the test records
x_test = scaler.transform(x_test)

# Amount of test records
test_len = len(x_test)

# The first part of the test set is the test set that will be used to calculate the accuracy
x_test_1 = x_test[:test_len // 2]
y_test_1 = y_test[:test_len // 2]

# The second part of the test set will be used to train the model further
x_train_2 = x_test[test_len // 2:]
y_train_2 = y_test[test_len // 2:]


## Train and export the model to a file

In [9]:
# Train the KNN classifier
classifier = KNeighborsClassifier(n_neighbors=5)

print("Training model...")
classifier.fit(train_x_1, train_y_1)
print("Model trained!")

# export the model to a file
print("Exporting model....")
joblib.dump(classifier, 'out/trained_model.mdl')
classifier = None
print("Model exported!")

Training model...
Model trained!
Exporting model....
Model exported!


## Importing the model from file and train further
In the follow code snippet we will load the model back and calculate the accuracy of the first test set. When the accuracy is less than 80% the model will be trained further with train set 2. After the training is done we will calculate the accuracy again.

In [10]:
print("Loading model.....")
classifier = joblib.load('out/trained_model.mdl')
print("Model loaded!")

pred_y_1 = classifier.predict(x_test_1)
accuracy = np.sum(y_test_1 == pred_y_1) / len(y_test_1) * 100
print(f"Accuracy: {round(accuracy, 1)}%")

if accuracy < 90:
	print("Training model for a second time")
	classifier.fit(x_train_2, y_train_2)

pred_y_1 = classifier.predict(x_test_1)
accuracy = np.sum(y_test_1 == pred_y_1) / len(y_test_1) * 100
print(f"Accuracy after second training: {round(accuracy, 1)}%")

Loading model.....
Model loaded!
Accuracy: 84.7%
Training model for a second time
Accuracy after second training: 87.4%
