<a href="https://colab.research.google.com/github/deivid-01/Snakes-Classifier/blob/main/04_model_arquitecture_and_training.ipynb" target="_parent\"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model arquitecture and training


### About:
- This notebook focus on train and validate machine learning model.
---
David Andrés Torres Betancour <br/>
Computer Engineering  Student <br/>
University of Antioquia <br/>
davida.torres@udea.edu.co

## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

import os
import tensorflow as tf
!pip install --upgrade tensorflow_hub
import tensorflow_hub as hub
from tensorflow.keras.layers import Flatten, Dense,Dropout
from tensorflow.keras.models  import Model,load_model
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import decode_predictions
from keras.applications import vgg16
from tensorflow.keras.applications import VGG16 # Wtf ?
from keras.optimizers import Adam

from google.colab import drive,files
import h5py

%matplotlib inline

Requirement already up-to-date: tensorflow_hub in /usr/local/lib/python3.7/dist-packages (0.12.0)


## Tools

### Kaggle Tools

In [1]:
def loadCredentials():
  !rm kaggle.json
  print("Upload kaggle.json file with credentials | IMPORTANT: Filename must be:kaggle.json\n")
  files.upload() #Upload file
  os.environ['KAGGLE_CONFIG_DIR'] = '/content' #Setup folder
  ! chmod 600 /content/kaggle.json  #Hide kaggle.json

def fetchDataSetFromKaggle():
  loadCredentials()
  ! kaggle datasets download -d deividt/snake-breeds #Download dataset
  ! unzip \*.zip && rm *.zip #Unzip downloaded file and delete zip
  print("Loaded completed")
def fetchModelsFromKaggle():
  ! kaggle datasets download -d deividt/snake-classifier-pretrainedmodels #Download dataset
  ! unzip \*.zip && rm *.zip #Unzip downloaded file and delete zip

def kaggle_createDataset(public=False):
  if public: 
    !kaggle datasets create -p  /content/model -u
  else: 
    !kaggle datasets create -p  /content/model

def kaggle_updateDataset(commit_message='Updating files'):
   !kaggle datasets version -p /content/model/ -m commit_message

def kaggle_setConfig(dataset_title="testing"):

  fileName="data/dataset-metadata.json"
  !kaggle datasets init -p  /content/model/
  addTitle2JSON(fileName,dataset_title)


def kaggle_saveModel():
  
  res = input("Do you want upload model  in  your kaggle Account? [y/n] ")
  
  if  res.lower()!="y" :
    return print("Upload canceled")

  res = readOption( msg = "Create new dataset for saving models[1] or update old one[2]? [1/2]")
  
  dataset_title = input("Enter dataset name: ")
  assert len(res)>0 , "Invalid length "

  kaggle_setConfig(dataset_title)

  if res=="1": #Create new dataset
    res = readOption( msg = "Create dataset Private[1] or Public [2]? [1/2]" )
    public_dataset = True if int(res)==2 else False
    kaggle_createDataset(public_dataset)
  elif res=="2": #Update dataset
    commit_msg = input ("Enter commit message (Ex. 'version 1.4') : ")
    kaggle_updateDataset(commit_msg)


### Model Tools

In [None]:
def loadModel(name):
  try:
    return load_model(name+'.h5')
  except:
    print("Model not found")
  

def fetchModel():
  print("Loading models from Kaggle")
  fetchModelsFromKaggle()
  print("Models loaded")
  print("---------------------------------")
  model_name = input ("\nEnter model name: ( Ex: model_a ) ")
  print("Searching model...")
  m = loadModel(model_name)
  print("Model "+model_name+" loaded successfully")
  return m

def getModel():

  res = readOption(" Create new model [1] or load pre-trained model from Kaggle [2]  [1/2] ")

  if res =='1':
    model_name = input(" Set  model name: (Ex. model_A) ")
    model = createModel( name= model_name ,num_breeds = 10 )
    print("New model created") 
  else:
    loadCredentials()
    model = fetchModel();
  return model

def model2TFLite(model):
  converter = tf.lite.TFLiteConverter.from_keras_model(model)
  tfmodel = converter.convert()
  open(model.name+'.tflite','wb').write(tfmodel)
  print("Model successfully convert to .tflite")


### Extra Tools

In [3]:
def readOption(msg):

  res = input(msg)
  assert res=='1' or res=='2',"Invalid option"
  return res

def decode_breedID(idx):
  df_decode = pd.read_csv('decode_breed.csv')
  breed_encode = df_decode['breed_encode']
  return df_decode.loc[breed_encode == idx]['breed'].tolist()[0]

def custom_preprocess_input(x):


  if not issubclass(x.dtype.type, np.floating):
    x = x.astype('float32', copy=False)
  print(x.dtype.type)

      # 'RGB'->'BGR'
 # x = x[..., ::-1]
  mean = [103.939, 116.779, 123.68]
  # Zero-center by mean pixel
  x[..., 0] -= mean[0]
  x[..., 1] -= mean[1]
  x[..., 2] -= mean[2]

  return x

### Training Tools

In [None]:
def train(model,x_train,y_train,x_test,y_test , batch_size, epochs, model_name=""):
    model.fit(x_train, y_train, epochs=epochs,
              batch_size=batch_size,
              validation_data=(x_test, y_test))
    metrics = model.evaluate(x_test, y_test)
    return {k:v for k,v in zip (model.metrics_names, metrics)}



### File Tools

In [None]:
def addTitle2JSON(fileName,dataset_title):
    data = readFile(fileName)
    data = addTitle2Data(data,dataset_title)
    writeFile(fileName,data)
    
def writeFile(fileName,data):
  with open(fileName,'w') as json_file:
      # Guardar la informacion
      json.dump(data,json_file)
      print("File "+fileName+" updated")

def readFile(fileName):
  with open(fileName, "r") as json_file: 
     return json.load(json_file)

def addTitle2Data(data,title):
  data['id'] = data['id'].split('/')[0]+'/'+title
  data['title'] = title
  return data

## Convolutional network arquitecture

- Using VGG16 Arquitecture

In [19]:
#Custom model
def createModel(num_breeds,name="model", ) :

  vgg= VGG16( 
      input_shape= (224,224,3), 
      weights='imagenet',
      include_top=False
      )

  #vgg.summary()
  custom_model=vgg

  for layer in custom_model.layers:
    layer.trainable = False
      
  last_layer = Flatten()(custom_model.output)
  last_layer = Dense(128, activation='relu', name='full_connected_1')(last_layer)
  last_layer = Dense(128, activation='relu', name='full_connected_2')(last_layer)
  out = Dense(num_breeds, activation='softmax', name='output')(last_layer)
  custom_model = Model(custom_model.input, out,name=name)

  
  opt = Adam(lr=0.001) #Stochastic gradient descent
  custom_model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])


  return custom_model

- Create a new model or load pre-trained model from Kaggle

In [None]:
model = getModel()

- Model summary

In [None]:
model.summary()

## Training and validation


- Fetch dataset from kaggle

In [None]:
fetchDataSetFromKaggle()

- Training and validate just certain amout of groups because of RAM limits

In this case is training just with eight groups

In [None]:

for i in range(1,8):

    destination_filepath = BASE_PATH+'/v6/batchs/'+'group_'+str(i)+'.h5'
    with h5py.File(destination_filepath, "r") as f:
        
        print("Group:",i)
        
        x = f["input_data"][:]
        y = f["input_labels"][:]
        x=x.reshape(x.shape[0],224,224,3)
        x=preprocess_input(x*255)
        
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.20)
    
    #Displaying information just for the first group
    if i ==1 :
      print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
      print("\nDistribution of train classes")
      print(pd.Series(y_train).value_counts())
      print("\nDistribution of test classes")
      print(pd.Series(y_test).value_counts())

    train(model,x_train,y_train,x_test,y_test , batch_size=32, epochs=10, model_name="modelA")

Group: 1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Group: 2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Group: 3
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Group: 4
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Group: 5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Group: 6
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Group: 7
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Testing

-Testing with images from google

In [None]:

#Set random image
img = cv2.imread('agkistrodon-contortrix.png')

img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC) #Resize
plt.imshow(img) #Display Image
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img= img.reshape(1,224,224,3)
img = custom_preprocess_input(img)
yhat = model.predict(img)

index = yhat.argmax()
print(decode_breedID( index ))
np.max(yhat)

## Saving model

- Local save

In [None]:
#Saving model
!mkdir model
model.save('model/'model.name+".h5")

- Saving model in Kaggle

In [5]:
kaggle_saveModel()

Do you want upload model  in  your kaggle Account? [y/n] n
Upload canceled


#Convert model to Tensorflow Lite

In [None]:
model2TFLite(model)

INFO:tensorflow:Assets written to: /tmp/tmpxunj5ubq/assets


INFO:tensorflow:Assets written to: /tmp/tmpxunj5ubq/assets


71785940