## Load Dependencies

In [7]:
# load dependencies
import pandas as pd 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

## Load Dataset

In [8]:
# load the dataset
url = "iris.data"

# column names to use
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']

# read the dataset from URL
dataset = pd.read_csv(url, names=names) 

# check first few rows of data
dataset.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Split Data and Standardize Features

In [9]:
# seperate the independent and dependent features
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values 

# Split dataset into random training and testing subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) 

# feature standardization
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) 

## Train Classifier

In [10]:
# training a KNN classifier
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train) 

## Check Classifier Performance

In [11]:
# make prediction on the testing data
y_predict = model.predict(X_test)

# check results
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict)) 

[[14  0  0]
 [ 0  6  0]
 [ 0  2  8]]
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        14
Iris-versicolor       0.75      1.00      0.86         6
 Iris-virginica       1.00      0.80      0.89        10

       accuracy                           0.93        30
      macro avg       0.92      0.93      0.92        30
   weighted avg       0.95      0.93      0.93        30



## Save Model in a Database

In [15]:
import pickle

# serialize the model
model_bytes = pickle.dumps(model)

In [16]:
import psycopg2

#establishing the connection
conn = psycopg2.connect(
   database="dbname", user='username', password='password', host='127.0.0.1', port= '5432'
)

In [17]:
# Create a table to store the model
cur = conn.cursor()
cur.execute("CREATE TABLE models (id INT PRIMARY KEY NOT NULL, name CHAR(50), model BYTEA)")

In [19]:
# Insert the serialized model into the database
cur.execute("INSERT INTO models (id, name, model) VALUES (%s, %s, %s)", (1, 'iris-classifier', model_bytes))
conn.commit()

In [20]:
# Close the database connection
cur.close()
conn.close()

## Load Model from Database

In [31]:
import psycopg2
import pickle

# Connect to the database
conn = psycopg2.connect(
   database="dbname", user='username', password='password', host='127.0.0.1', port= '5432'
)

# Retrieve the serialized model from the database
cur = conn.cursor()
cur.execute("SELECT model FROM models WHERE name = %s", ('iris-classifier',))
model_bytes = cur.fetchone()[0]

# Deserialize the model
model = pickle.loads(model_bytes)

# Close the database connection
cur.close()
conn.close()

# test loaded model
# evaluate model 
y_predict = model.predict(X_test)

# check results
print(classification_report(y_test, y_predict)) 

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        14
Iris-versicolor       0.75      1.00      0.86         6
 Iris-virginica       1.00      0.80      0.89        10

       accuracy                           0.93        30
      macro avg       0.92      0.93      0.92        30
   weighted avg       0.95      0.93      0.93        30

