## The ImageDataGenerator for X-ray Body Part Classifier

In [43]:
import os
from google.colab import drive
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow import keras
from keras.regularizers import l2
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, BatchNormalization, AveragePooling2D
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


### Download CSV files with features (X) and target (y)

In [25]:
X = pd.read_csv('/content/drive/MyDrive/X_train.csv')

In [31]:
y = pd.read_csv('/content/drive/MyDrive/y_train.csv')

In [33]:
#y_1=list(y_1['0'])

In [28]:
#X_1=X_1.drop(['Unnamed: 0'], axis=1)

In [29]:
#X_1 = pd.DataFrame(X_1).to_numpy()

In [14]:
#y_1 = pd.read_csv('/content/drive/MyDrive/y_train.csv')

### Show classes and number of instances in them

In [32]:
y.groupby(['0'])['0'].count()

0
0      80
1      41
2      77
3     724
4       9
5      23
6      70
7      15
8      12
9      31
10     19
11    102
12     19
13     39
14    120
15     67
16     40
17     23
18     10
19      7
20     15
21     63
22      2
23     45
24     11
25      7
26     42
27      1
28      3
29      1
30      1
31      5
32      1
33      2
34      4
35      1
36      2
37      1
38      1
39      1
40      1
Name: 0, dtype: int64

##### The classes are imbalanced, one way to avoid overfitting is to use the ImageDataGenerator. To do this, create folders with the name of the classes, where all the images of this class will be.

In [12]:
parent_dir = '/content/drive/MyDrive/jpg/'

In [None]:
drs=[]                                                                        
for i in range(0, len(y)):
  drs = [f for (root, dirs, file) in os.walk(parent_dir) for f in dirs]       # make the folders list
  im = Image.fromarray(X[i].reshape(84,84))                                 # reshape array to image
  im = im.convert("L")
  if str(y[i]) in drs:
    im.save('/content/drive/MyDrive/jpg/'+str(y[i])+'/'+str(i)+'.jpeg')     # save the image in a folder if that folder exists, or create one and then save
    os.mkdir(os.path.join(parent_dir, str(y[i])))                           
    im.save('/content/drive/MyDrive/jpg/'+str(y[i])+'/'+str(i)+'.jpeg')

In [None]:
#X_resampled_1=X_resampled.drop(['Unnamed: 0'], axis=1)

In [None]:
#X_resampled = pd.DataFrame(X_resampled).to_numpy()

In [None]:
#X_resampled.shape

(2461, 7056)

In [None]:
#X_resampled = np.reshape(X_resampled, (2461, 84, 84))

### Split data for training and testing

In [34]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size = 0.1, random_state = 45)

### Make categories for target

In [35]:
y_train_cat = keras.utils.to_categorical(y_train,41)                           
y_test_cat = keras.utils.to_categorical(y_test,41)

### Prepare arrays with features

In [36]:
X_train = np.reshape(X_train, (1564, 84, 84))
X_test = np.reshape(X_test, (174, 84, 84))

In [37]:
X_train = np.expand_dims(X_train, axis=3)
X_test = np.expand_dims(X_test, axis=3)

### Make model

In [50]:
epo = 52                                         # number of epochs
batch = 32
l2_lambda = 0.0003                               # L2-regularisation
myOpt = keras.optimizers.Adam(learning_rate = 0.0003)

  super(Adam, self).__init__(name, **kwargs)


### Set up the generator

In [41]:
train_image_generator = ImageDataGenerator( rotation_range = 30,
                                            width_shift_range = 0.2)

train_data_gen = train_image_generator.flow_from_directory ( parent_dir,
                                                           shuffle = True,
                                                           target_size = (84, 84),
                                                           batch_size = batch,
                                                           class_mode = 'categorical',
                                                           color_mode = 'grayscale')

Found 1738 images belonging to 41 classes.


### Create a convolutional neural network

In [49]:
model = keras.Sequential([
    Conv2D(32, (3,3),
           padding = 'same',
           activation = 'relu',
           kernel_regularizer = l2(l2_lambda),
           input_shape = (84, 84, 1)),
    AveragePooling2D(pool_size = (4, 4)),
    Conv2D(32, (3,3),
           padding = 'same',
           activation = 'relu'),
    AveragePooling2D(pool_size=(4, 4)),
    Conv2D(64, (3,3), padding = 'same',
           activation='relu'),
    BatchNormalization (),
    AveragePooling2D(pool_size = (4, 4)),
    Flatten(),
    Dense(512, activation = 'relu'),
    Dropout(0.5),
    Dense(41,  activation = 'softmax')
])

model.compile(optimizer = myOpt,
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])


his = model.fit_generator(
    generator = train_data_gen,  steps_per_epoch = len(X_train)//batch,  epochs = epo,
    validation_data = (X_test, y_test_cat),
                       callbacks = [EarlyStopping (monitor = 'val_loss', patience = 5)] )


model.evaluate(X_test, y_test_cat)

Epoch 1/52


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/52
Epoch 3/52
Epoch 4/52
Epoch 5/52
Epoch 6/52
Epoch 7/52


[5.396466255187988, 0.04022988677024841]

In [45]:
predictions = model.predict(X_test)
predictions

array([[6.00589487e-07, 4.93631491e-09, 7.88170098e-08, ...,
        1.35998235e-11, 1.44218717e-11, 5.37824789e-08],
       [1.19887130e-12, 2.56155760e-28, 1.07254647e-15, ...,
        8.07582398e-33, 2.08566058e-31, 1.83279699e-17],
       [1.06371352e-02, 6.69089717e-09, 5.88171861e-06, ...,
        4.53809802e-11, 1.44415702e-09, 1.52307837e-08],
       ...,
       [9.28043795e-04, 6.95998592e-09, 5.04286727e-05, ...,
        2.46653631e-12, 3.20315788e-11, 8.11570135e-06],
       [8.51422141e-04, 7.90442781e-11, 1.20217510e-06, ...,
        1.65531190e-13, 1.45129080e-12, 4.55356712e-08],
       [2.13883463e-02, 8.71464889e-03, 3.27558964e-02, ...,
        5.78164647e-04, 1.08072418e-03, 1.47973418e-01]], dtype=float32)

In [46]:
y_pred = np.argmax(predictions, axis=1)
print(y_pred)

[23  6 12 12 16 16 23  8 23 12 23 14 14 23 19 23 23 23 23 37  8 23  0 23
 23 23 23  0  0 23 23 23  6 23 23 23 23  8 23  3 23 12 23  7 23 11  0  6
 23 12  0 23 12 23 23 23 14 16 23 40 23 39  1 23 23 16 23 23 23 23 23 23
 16 23 23  5  3 14 16 34 14  5 23  7  7  3 23 38  0 23 23 23 23 37 19 23
  0 23 30 14  0 23 14 19 14 23 38 23 12  6 23 23 23 23  0  6  7 12 23 23
 23 37 19  0 23 37  3  7 16 13 23 23  3 19 23 23  6 23 38  8 40 23  8 12
  8  7 37 23 40 23  6 23 23 18 23 23 23 12  3 10 23 19 23 23 12  6  0 12
  6 23  0 23 23 13]


In [47]:
np.array(y_test)

array([ 3, 14, 20,  2, 23, 23, 14, 16,  3,  3,  3, 21, 21,  3, 26,  3,  3,
        3, 14,  6, 16,  3,  0,  3,  3,  3,  3, 13, 11,  3,  3,  3, 14,  3,
        3,  3,  3, 17,  3, 11,  3,  2,  3, 15,  2, 19,  0, 14,  3, 13, 11,
        3,  2,  3,  3,  3, 21, 23,  3,  9,  3,  8,  1,  3,  3, 14,  3,  3,
        3,  3,  3,  3, 23,  3,  3, 11, 10, 21, 23,  4, 21, 11,  3, 15, 35,
       11,  3,  7, 23, 16,  3,  3,  3,  6, 26,  3,  0,  3, 36, 21, 13, 16,
       21, 26,  8,  3,  9,  3,  2, 11,  3,  3,  3,  3,  0, 14, 15,  3,  3,
        3,  3,  6, 26,  0,  3,  6, 11, 15, 23,  6,  3,  3, 11, 26, 17, 14,
       14,  3,  9, 16,  9,  3, 16,  1, 16, 15,  6,  3,  9,  3,  3,  6,  3,
       25,  3,  3,  3, 16, 11, 18,  3,  9,  3,  3,  2,  3,  0, 13, 14, 24,
        0,  3,  3,  9])

In [48]:
accuracy_score(y_test, y_pred)

0.04597701149425287