## Import Data

In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Input, BatchNormalization, MaxPool2D
import pickle
import sys
import cv2
import gc # to delete used memory after running models

In [2]:
import sys
print(sys.executable)

C:\Users\alexc\AppData\Local\Programs\Python\Python310\python.exe


In [3]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1635472559894361
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4185718784
locality {
  bus_id: 1
  links {
  }
}
incarnation: 18390997025480888405
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5"
xla_global_id: 416903419
]


### Techniques to use: 

In [4]:
use_smote = False

In [5]:
use_data_augmentation = False

## Data Loading 

In [6]:
pickle_in = open("x.pickle","rb")
x = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)

In [7]:
np.set_printoptions(threshold=10)
x[0:1]

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8)

## Train Test Split

In [8]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.15, random_state = 1)

In [9]:
#get the unique classes with set, and count them with len
K = len(set(y_train))
print("Unique classes: ", K)

Unique classes:  9


In [10]:
x_train.shape

(46201, 224, 224)

In [11]:
y

array([3, 8, 8, ..., 8, 8, 0], dtype=int64)

## Smote 

In [12]:
#Reshaping x_train to use Smote

In [13]:
from imblearn.over_sampling import SMOTE

if(use_smote):
    x_train_rows = len(x_train)
    x_train_rows
    x_train = x_train.reshape(x_train_rows,-1)
    x_train.shape
#----------------    
    x_train_rows = len(x_train)
    x_train = x_train.reshape(x_train_rows,-1)
    #Majority Class counts: 29360
#----------------   
    number_samples = 10000 #29360

    smote = SMOTE(sampling_strategy = {0:number_samples, 1:number_samples, 2:number_samples, 3:number_samples,
         4:number_samples, 5:number_samples, 6:number_samples, 7:number_samples, 8:29360}, random_state= 4)
    x_smote, y_smote = smote.fit_resample(x_train, y_train)
    
    print(x_smote.shape, x_smote[0].shape, y_smote.shape)
#----------------
    x_train = x_smote.reshape(-1,45,45)
    y_train = y_smote
    print(x_train.shape)

## Create Model

In [14]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
    print("Device:", tpu.master())
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()

Not connected to a TPU runtime. Using CPU/GPU strategy
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [15]:
img_augmentation = Sequential(
    [
        layers.RandomRotation(factor=0.15),
        layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        layers.RandomFlip(),
        layers.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)

In [16]:
from tensorflow.keras.applications import EfficientNetB0

IMG_SIZE = 224

with strategy.scope():
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = img_augmentation(inputs)
    outputs = EfficientNetB0(include_top=True, weights=None, classes=K)(x)

    model = tf.keras.Model(inputs, outputs)
    model.compile(
        optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
    )

model.summary()

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

In [17]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience = 10)

## Data Augmentation 

In [18]:
#ImageDataGenerator requires 4 dimensions, including the color (greyscale or colored images)
if(use_data_augmentation):
    x_train = x_train.reshape(x_train.shape[0], 45, 45,1)

In [19]:
if(use_data_augmentation):
    batch_size = 16
    data_generator = tf.keras.preprocessing.image.ImageDataGenerator(width_shift_range = 0.1, height_shift_range = 0.1, 
                                                                     horizontal_flip = True, vertical_flip = True,)
                                                                     #rotation_range = 180,
                                                                     #fill_mode = 'constant')
    train_generator = data_generator.flow(x_train,y_train,batch_size)
    steps_per_epoch = x_train.shape[0]//batch_size # divided twice by batch_size?

## Fit the model 

In [20]:
#Used with the Data Augmentation segment:

if(use_data_augmentation):
    history = model.fit(train_generator, validation_data = [x_test, y_test], steps_per_epoch = steps_per_epoch, epochs = 100,
                 callbacks = [callback])
    
# 0.9189 val_accuracy

In [None]:
#Used normally without Data Augmentation Segment:
#x_train = x_train.reshape(, 224, 224, 3)

if(not use_data_augmentation):
    history = model.fit(#x,y,
                        x_train, y_train, validation_data = [x_test,y_test], 
                        epochs= 50, verbose = 2, callbacks=[callback], batch_size = 64)

## Graphing the accuracies and losses 

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label = 'accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.legend()

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label = 'loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.legend()

## Evaluating model with training data 

In [None]:
### The way to measure the model performance

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve

def evaluation_measurement(y_valid, y_pred, num_classes=9, output_fig=False):
    fpr = {}
    tpr = {}
    thresh ={}
    mcauc = []
    for i in range(num_classes):    
        fpr[i], tpr[i], thresh[i] = roc_curve(y_valid, y_pred[:, i], pos_label=i)
        auc_score = roc_auc_score(y_valid, y_pred, multi_class = 'ovr')  #'ovr'
        mcauc.append(auc_score)

    plt.plot(fpr[0], tpr[0], linestyle='--',color='orange', label='Class 0 vs Rest')
    plt.plot(fpr[1], tpr[1], linestyle='--',color='green', label='Class 1 vs Rest')
    plt.plot(fpr[2], tpr[2], linestyle='--',color='blue', label='Class 2 vs Rest')
    plt.plot(fpr[3], tpr[3], linestyle='--',color='yellow', label='Class 3 vs Rest')
    plt.title('Multiclass ROC curve')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='best')
    
    print("MCAUC= ", np.mean(mcauc))
    
    if output_fig:
        plt.savefig('Multiclass ROC', dpi=300)

In [None]:
y_pred = model.predict(x_test)

In [None]:
evaluation_measurement(y_test,y_pred)

In [None]:
y_test

In [None]:
y_pred

## Opening pickle with data for predictions 

In [None]:
pickle_in = open("x_to_predict.pickle","rb")
x_testing = pickle.load(pickle_in)

In [None]:
x_testing.shape

In [None]:
x_testing

## Predictions

In [None]:
%%time
predictions = model.predict(x_testing)

In [None]:
len(predictions)

In [None]:
predictions[0]

## Uploading CSV 

In [None]:
#for uploading

import pandas as pd

pred_df = pd.DataFrame(predictions, columns = ["Type 0", "Type 1", "Type 2", "Type 3", "Type 4", "Type 5", "Type 6", "Type 7", "Type 8"])
#pred_df['Id'] = pred_df.reset_index().index

In [None]:
pred_df

In [None]:
pred_df.to_csv('./uploads/#Efficientnet.csv',index_label="Id")

In [None]:
del x,y, x_train, y_train, x_test, y_test, pred_df, history, model
gc.collect()

# -------------------------------------------------- 