In [5]:
import os
import boto3
import numpy as np
import pandas as pd

import cv2 as cv
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf

import splitfolders
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import MobileNet

from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, GlobalAveragePooling2D

## Preprocessing

In [6]:
# paths
local_drug_directory = 'local_drug_directory'
train_dir = "output/train/"
valid_dir = "output/val/"

In [7]:
# define loading parameters
img_height = 224
img_width = 224
batch_size = 1

In [8]:
train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)

In [9]:
# using ImageDataGenerator and flow_from_directory to preprocess the images
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(img_width,img_height),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    class_mode="categorical"
                                                   )

Found 437 images belonging to 23 classes.


In [10]:
# using ImageDataGenerator and flow_from_directory to preprocess the images
validation_generator = valid_datagen.flow_from_directory(valid_dir,
                                                    target_size=(img_width,img_height),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    class_mode="categorical"
                                                   )

Found 46 images belonging to 23 classes.


In [11]:
# to extract the class names for label
class_names = train_generator.class_indices.keys()
class_names

dict_keys(['Amoxicillin 500 mg', 'Apixaban 2.5 mg', 'Aprepitant 80 mg', 'Atomoxetine 25 mg', 'Calcitriol 0.00025', 'Prasugrel 10 MG', 'Ramipril 5 MG', 'Saxagliptin 5 MG', 'Sitagliptin 50 MG', 'Tadalafil 5 MG', 'carvedilol 3.125', 'celecoxib 200', 'duloxetine 30', 'eltrombopag 25', 'metformin_500', 'montelukast-10', 'mycophenolate-250', 'omeprazole_40', 'oseltamivir-45', 'pantaprazole-40', 'pitavastatin_1', 'prednisone_5', 'sertraline_25'])

In [None]:
sample_names = []
training_samples = []

[training_samples.append(list(train_generator.classes).count(x)) for x in range(0,23)]

# Get only drug name, not dosage
for x in list(class_names):
    drug = x.split()[0]
    
    if drug == x:
        drug = x.split(sep = '_')[0]
        
    if drug == x:
        drug = x.split(sep = '-')[0]
    
    sample_names.append(drug.capitalize())


training_sample_size = pd.DataFrame({'Drug Name': sample_names,
                                    'count': training_samples})
training_sample_size

-------
## Using MobileNet to train our data
-------

In [11]:
# define a base model with non trainable params
base_model = MobileNet(weights = 'imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
base_model.summary()


In [12]:
# set up our model and layers
model = Sequential()

model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(23, activation="softmax"))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenet_1.00_224 (Functi  (None, 7, 7, 1024)        3228864   
 onal)                                                           
                                                                 
 global_average_pooling2d (  (None, 1024)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 23)                23575     
                                                                 
Total params: 3252439 (12.41 MB)
Trainable params: 3230551 (12.32 MB)
Non-trainable params: 21888 (85.50 KB)
_________________________________________________________________


In [13]:
# compile the model
model.compile(optimizer='adam',
              loss="categorical_crossentropy",
              metrics=['accuracy'])

In [None]:
# fit the model
model.fit(train_generator, epochs=2)

Epoch 1/2
 78/437 [====>.........................] - ETA: 55s - loss: 3.8577 - accuracy: 0.2308

In [None]:
# display training loss and training accuracy
model_loss, model_accuracy = model.evaluate(validation_generator, verbose=2)
print(f" loss : {round(model_loss)*100}%, accuray : {round(model_accuracy,2)*100}%")

In [None]:
# save the model
# model.save('MobileNet_02.keras')

# Training Sample Sizes

In [36]:
sample_names = []
training_samples = []

[training_samples.append(list(train_generator.classes).count(x)) for x in range(0,23)]

# Get only drug name, not dosage
for x in list(class_names):
    drug = x.split()[0]
    
    if drug == x:
        drug = x.split(sep = '_')[0]
        
    if drug == x:
        drug = x.split(sep = '-')[0]
    
    sample_names.append(drug.capitalize())

# plt.pie(training_samples, labels = sample_names)
# plt.title('Size of Training Data')
# plt.show()
    
# plt.bar(x = sample_names, height = training_samples)
# plt.title('Size of Training Data')
# plt.ylabel('Number of Samples (n)')
# plt.xlabel('Drug Name')
# plt.xticks(rotation='vertical')
# plt.show()

training_sample_size = pd.DataFrame({'Drug Name': sample_names,
                                    'count': training_samples})
training_sample_size

Unnamed: 0,Drug Name,count
0,Amoxicillin,19
1,Apixaban,19
2,Aprepitant,19
3,Atomoxetine,19
4,Calcitriol,19
5,Prasugrel,19
6,Ramipril,19
7,Saxagliptin,19
8,Sitagliptin,19
9,Tadalafil,19
