In [30]:
# import libraries
import pandas as pd 
from sklearn.model_selection import train_test_split 
import matplotlib.pyplot as plt 
import numpy as np
import random
import sqlite3
import tensorflow as tf
import keras
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from keras.callbacks import TensorBoard
from keras.applications.densenet import DenseNet121
from keras.models import Model

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [31]:
# Load data from DB
def DB_Connection(db_name):
    ''' Returns a Database connection'''
    conn = sqlite3.connect(db_name)
    print("Connection to DB successfully")
    return conn


# Connect to database 
db_conn = DB_Connection('../db.sqlite3')
cursor = db_conn.cursor()

# List of all tables in the database
for row in cursor.execute("SELECT name FROM sqlite_master WHERE type='table';"):
    print(row)

Connection to DB successfully
('django_migrations',)
('sqlite_sequence',)
('auth_group_permissions',)
('auth_user_groups',)
('auth_user_user_permissions',)
('django_admin_log',)
('django_content_type',)
('auth_permission',)
('auth_group',)
('auth_user',)
('detection_system_dataset',)
('detection_system_dlmodel',)
('detection_system_image',)
('detection_system_modelmetrics',)
('detection_system_img',)
('detection_system_deployedmodel',)
('django_session',)


In [32]:
### Load the data from DB
cancer_data = pd.read_sql_query("SELECT * FROM detection_system_img", db_conn)


# Data preparation

In [33]:
cancer_data.drop(columns=['id', 'dataset_id'], inplace=True )
#cancer_data = cancer_data.drop(cancer_data[cancer_data['label'] == 4].sample(frac=.80).index)

print(cancer_data.value_counts('label'))

#cancer_data.to_csv('prepped_train.csv', index = False)

#Print the head
cancer_data.head()

label
4    5337
2     889
6     875
1     430
0     263
5     110
3      96
dtype: int64


Unnamed: 0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel0775,pixel0776,pixel0777,pixel0778,pixel0779,pixel0780,pixel0781,pixel0782,pixel0783,label
0,0,0,2,2,26,84,115,127,130,137,...,137,135,130,112,48,4,3,0,0,4
1,166,167,168,167,170,170,168,175,172,169,...,163,158,168,169,165,163,159,162,146,4
2,167,170,174,173,170,173,176,180,177,187,...,148,152,153,167,162,167,171,170,163,4
3,155,154,159,157,156,162,167,160,163,159,...,160,160,159,156,156,154,152,152,151,4
4,180,184,187,189,189,191,192,192,193,195,...,193,193,190,192,190,189,188,186,185,4


In [34]:
### Prepare the dataset by splitting into training, test and validation

# Split the dataset into training and test set. training = 90%, test = 10%
train_cancer_data, test_cancer_data = train_test_split(cancer_data, test_size=0.1, random_state = 12345)
print('---------------------Shapes after split----------------------')
print('train_cancer data shape: ', train_cancer_data.shape)
print('test_cancer_data shape: ', test_cancer_data.shape)

# Split the train_cancer_data set into training and validation set. training = 80%, validation = 20%
train_cancer_data, val_cancer_data = train_test_split(train_cancer_data, test_size=0.2, random_state = 12345)
print('---------------------Shapes after split----------------------')
print('train_cancer data shape: ', train_cancer_data.shape)
print('val_cancer_data shape: ', val_cancer_data.shape)

---------------------Shapes after split----------------------
train_cancer data shape:  (7200, 785)
test_cancer_data shape:  (800, 785)
---------------------Shapes after split----------------------
train_cancer data shape:  (5760, 785)
val_cancer_data shape:  (1440, 785)


In [35]:
## Split the datasets into features and target

#Split training_cancer set into feature and target set
train_cancer_features = train_cancer_data.drop('label', axis =1) # drop() creates a copy and does not affect original data 
train_cancer_target = train_cancer_data["label"].copy() 
train_cancer_target.columns = ['label']

print('---------------------Shapes after feature and target split----------------------')
print('Shape of train_cancer_features:', train_cancer_features.shape)
print('Shape of train_cancer_target:', train_cancer_target.shape)

#Split test_cancer set into feature and target set
test_cancer_features = test_cancer_data.drop('label', axis =1) # drop() creates a copy and does not affect original data 
test_cancer_target = test_cancer_data["label"].copy() 
test_cancer_target.columns = ['label']

print('---------------------Shapes after feature and target split----------------------')
print('Shape of test_cancer_features:', test_cancer_features.shape)
print('Shape of test_cancer_target:', test_cancer_target.shape)

#Split val_cancer set into feature and target se
val_cancer_features = val_cancer_data.drop('label', axis =1) # drop() creates a copy and does not affect original data 
val_cancer_target = val_cancer_data["label"].copy() 
val_cancer_target.columns = ['label']

print('---------------------Shapes after feature and target split----------------------')
print('Shape of val_cancer_features:', val_cancer_features.shape)
print('Shape of val_cancer_target:', val_cancer_target.shape)

---------------------Shapes after feature and target split----------------------
Shape of train_cancer_features: (5760, 784)
Shape of train_cancer_target: (5760,)
---------------------Shapes after feature and target split----------------------
Shape of test_cancer_features: (800, 784)
Shape of test_cancer_target: (800,)
---------------------Shapes after feature and target split----------------------
Shape of val_cancer_features: (1440, 784)
Shape of val_cancer_target: (1440,)


In [36]:
# Transform datafsets into arrays
X_training = np.array(train_cancer_features, dtype='float32')
X_testing = np.array(test_cancer_features, dtype='float32')
X_validating = np.array(val_cancer_features, dtype = 'float32')

y_train = np.array(train_cancer_target, dtype='float32')
y_test = np.array(test_cancer_target, dtype='float32')
y_val = np.array(val_cancer_target, dtype = 'float32')

In [37]:
## Prepare the datasets, divide by 255 to get value between 0-1
X_train = X_training[:]/255
X_test = X_testing[:]/255
X_val = X_validating[:]/255

# Reshape the data for the CNN model
X_train = X_training.reshape(X_training.shape[0], * (28, 28))
X_test = X_testing.reshape(X_testing.shape[0], * (28, 28))
X_val = X_validating.reshape(X_validating.shape[0], * (28, 28))
## Padding the sets to make them 32x32
X_train = tf.pad(tensor=X_train, paddings=[[0, 0], [2,2], [2,2]])
X_test = tf.pad(tensor=X_test, paddings=[[0, 0], [2,2], [2,2]])
X_val = tf.pad(tensor=X_val, paddings=[[0, 0], [2,2], [2,2]])

# Adding the 3 channel to shape so the DenseNEt121 can accept our data
X_train = np.repeat(X_train[..., np.newaxis], 3, -1)
X_val = np.repeat(X_val[..., np.newaxis], 3, -1)
X_test = np.repeat(X_test[..., np.newaxis], 3, -1)

In [38]:
from keras.layers import GlobalMaxPooling2D

## Building the model
base_model = DenseNet121(weights='../../Transfer_Learning/densenet.hdf5', include_top=False, input_shape = (32, 32, 3))

# Connecting all of the model layers
x = base_model.output  #(None, 1, 1, 1024)
x = GlobalMaxPooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(32, activation='relu')(x)
x = Dense(16, activation='relu')(x)

predictions = Dense(7, activation='softmax')(x)

model = Model(inputs = base_model.input, outputs=predictions)
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

#model.summary()

# Train the model
history = model.fit(X_train,y_train, epochs=4,validation_data=(X_val,y_val))



Epoch 1/4


KeyboardInterrupt: 

In [None]:
## Evaluating the model
loss, accuracy = model.evaluate(X_test, y_test, batch_size=100)

In [None]:
## Save the model (remove the #)
model.save('densenet121_model_even_dist0.h5')

In [None]:
predicted_classes = np.argmax(model.predict(X_test), 1)

confusion_matrix = confusion_matrix(y_test, predicted_classes)


prec= precision_score(y_test, predicted_classes, average = 'macro')
rec = recall_score(y_test, predicted_classes, average = 'macro')
f1 = f1_score(y_test, predicted_classes, average = 'macro')

prec_rec_f1 = {'Precision': [prec], 'Recall': [rec], 'F1': [f1], 'ConfusionMatrix': [confusion_matrix]}
df_prec_rec_f1 = pd.DataFrame(data=prec_rec_f1)


print(df_prec_rec_f1)

df_prec_rec_f1.to_csv('testingdfcsv.csv', index = False)