In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import cv2

import random
import math
import networkx as nx

import boto3
from PIL import Image

import requests
import json

from tqdm import tqdm
getattr(tqdm, '_instances', {}).clear()  # ⬅ add this line

%matplotlib inline

plt.style.use('ggplot')

pd.set_option('display.max_columns', 50)

In [4]:
import pickle # save images
import time # get time stamp of models trained
import shap

## Import Images

In [None]:
# grab and resize image from and to s3 bucket

img_dir = 'new_images' # folder containing all other folders of images


def resize_images_array(img_dir, file_paths):
    # arrays of image pixels
    img_arrays = []
    
    # loop through the dataframe that is linked to its label so that all images are in the same order
    for path in tqdm(file_paths):
        s3 = boto3.client('s3')
        try:
            obj = s3.get_object(Bucket=bucket, Key=f'{img_dir}/{path}')
            img_bytes = BytesIO(obj['Body'].read())
            open_img = Image.open(img_bytes)
            arr = np.array(open_img.resize((200,200))) # resize to 200,200. possible to play around with better or worse resolution
            img_arrays.append(arr)
        except:
#             print(path) # get file_path of ones that fail to load
            continue

    return np.array(img_arrays)

In [None]:
X = resize_images_array()

### Look at single image

In [None]:
single_img = 
single_img

In [None]:
obj = s3.get_object(Bucket=bucket, Key=f'images/0776/16398b734cf540e3b0bcc943621e3515.jpg')
img_bytes = BytesIO(obj['Body'].read())
open_img = Image.open(img_bytes)

In [None]:
# By stacking these together into a 3-tensor, we can represent a color image as a single object.

fig, axes = plt.subplots(1, 4, figsize=(16,6))

axes[0].imshow(open_img)
axes[0].set_title('original')
for ax, channel, name in zip(axes[1:], open_img.split(), ['red channel', 'green channel', 'blue channel']):
    ax.imshow(channel)
    ax.set_title(name)

### Normalize Feature Arrays

In [None]:
# normalize the RBG values
X = X/255.0

### Label and Features

In [None]:
y = ['duck', 'hawk', 'finch']

In [None]:
print('label shape: ', y.shape)
print('features shape: ', X.shape)

In [None]:
# check to make sure the bird images and labels are aligned
# this is indeed a semipalmated sandpiper

print()
plt.imshow(X[57]);

# Model Time

### Work Flow

1. X, and y defined
- make sure they are arrays!!

2. normalize X values by dividing by 255
3. check images
4. train test split
5. make model Sequential()
6. add input layer
7. add multiple hidden layers
8. ADD FLATTEN LAYER, MUST BE BEFORE OUTPUT
9. add dense layer, which are fully connected layers
10. add output dense layer, will be the amount of labels there are
11. model.compile(loss = 'sparse_categorical_crossentropy', optimizer= 'adam', metrics=['accuracy'])
12. model.fit(xtrain, ytrain, epochs) also has validation_split (out of sample) do about 0.1, batchsize: how many at a time, more data requires bigger (20-200 range)
13. model.evaluate(xtest,ytest) returns val loss and val accuracy  

14. model.save('name') saves the model
- to load: new_model = tf.keras.models.load_model('name')

### Import Libraries

In [None]:
# keras and tensorflow downloads
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPool2D, BatchNormalization # CNN
from tensorflow.keras.callbacks import TensorBoard # graphical visual of loss and accuracy over the epochs of train and test set
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import datetime

tf.__version__

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
plt.imshow(X_train[55]);

In [None]:
print('X_train shape: ', X_train.shape)
print('X_test shape: ', X_test.shape)
print('y_train shape: ', y_train.shape)
print('y_test shape: ', y_test.shape)

## CNN: Convolutional Neural Network Modelx 1

In [None]:
# Create model
model1 = Sequential()

# Convolution Layer
modelx1.add(Conv2D(32, (3,3), activation='relu', input_shape=X_train[0].shape)) # scans with a (3,3) grid
modelx1.add(BatchNormalization())
modelx1.add(MaxPool2D(2,2)) # grid to pool together the first grid
modelx1.add(Dropout(0.3))

modelx1.add(Conv2D(64, (3,3), activation='relu')) # scans with a (3,3) grid
modelx1.add(BatchNormalization())
modelx1.add(MaxPool2D(2,2)) # grid to pool together the first grid
modelx1.add(Dropout(0.3))

modelx1.add(Conv2D(128, (3,3), activation='relu')) # scans with a (3,3) grid
modelx1.add(BatchNormalization())
modelx1.add(MaxPool2D(2,2)) # grid to pool together the first grid
modelx1.add(Dropout(0.4))

# Must Flatten before entering Dense layers
modelx1.add(Flatten())

modelx1.add(Dense(128, activation='relu'))
modelx1.add(BatchNormalization())
modelx1.add(Dropout(0.4))

modelx1.add(Dense(128, activation='relu'))
modelx1.add(BatchNormalization())
modelx1.add(Dropout(0.4))

modelx1.add(Dense(y_train.shape[1], activation='softmax')) # have to have same amount as y_train.shape[1]

In [None]:
print('Modelx 1: CNN')
print(f'Number of Training Images: {X_train.shape[0]}/{X_train.shape[0] + X_test.shape[0]}')
modelx1.summary()

In [None]:
modelx1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
log_dir = os.path.join("logs/fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
print('Modelx 1: CNN')
print(f'Number of Training Images: {X_train.shape[0]}/{X_train.shape[0] + X_test.shape[0]}')
historyx1 = modelx1.fit(X_train, y_train, batch_size = 100, epochs=10, validation_split=0.1, callbacks=[tensorboard_callback])

In [None]:
historyx1.history

In [None]:
fig, axes = plt.subplots(1,2,figsize=(12,6))

epoch_range = range(1, 11)

axes[0].plot(epoch_range, historyx1.history['accuracy'])
axes[0].plot(epoch_range, historyx1.history['val_accuracy'])
axes[0].set_ylabel('Accuracy')
axes[0].set_xlabel('Number of Epochs')
axes[0].legend(['Train', 'Val'], loc='upper left')
axes[0].set_title('Modelx1 Accuracy')

axes[1].plot(epoch_range, historyx1.history['loss'])
axes[1].plot(epoch_range, historyx1.history['val_loss'])
axes[1].set_ylabel('Accuracy')
axes[1].set_xlabel('Number of Epochs')
axes[1].legend(['Train', 'Val'], loc='upper left')
axes[1].set_title('Modelx1 Loss')

plt.savefig('graphs/modelx1_acc_loss.png')

In [None]:
# %load_ext tensorboard

# %tensorboard --logdir='logs/'

In [None]:
pred1 = modelx1.predict(X_test)

In [None]:
print('CNN Model 1 Prediction Check: ')
print('True label of bird: ',)
print('Predicted label of bird: ', )

In [None]:
# modelx1.save('saved_models/conv-3-dense-2-fr32-128.h5')
# keras.models.load_model