In [94]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import cv2

import random
import math
import networkx as nx

import boto3
from PIL import Image

import requests
import json

from tqdm import tqdm
getattr(tqdm, '_instances', {}).clear()  # ⬅ add this line

%matplotlib inline

plt.style.use('ggplot')

pd.set_option('display.max_columns', 50)

In [95]:
import flask
from flask import request

In [96]:
import matplotlib.image as mpimg # show images
from io import BytesIO # reading bytes

import pickle # save images
import time # get time stamp of models trained
# import shap

## Import Images

In [97]:
# grab and resize image from and to s3 bucket

bucket = 'cwbirdsimages'
img_dir = 'new_images' # folder containing all other folders of images
folders = ['ducks', 'finches', 'hawks']

def resize_images_array(img_dir, folders, bucket):
    # arrays of image pixels
    img_arrays = []
    labels = []
    
    # loop through the dataframe that is linked to its label so that all images are in the same order
    for folder in tqdm(folders):
        s3 = boto3.client('s3')
        enter_folder = s3.list_objects_v2(Bucket=bucket, Prefix=f'{img_dir}/{folder}')
        for i in enter_folder['Contents'][2:]:
            try:
                filepath = i['Key']
                obj = s3.get_object(Bucket=bucket, Key=f'{filepath}')
                img_bytes = BytesIO(obj['Body'].read())
                open_img = Image.open(img_bytes)
                arr = np.array(open_img.resize((299,299))) # resize to 200,200. possible to play around with better or worse resolution
                img_arrays.append(arr)
                labels.append(folder)
            except:
                print(filepath) # get file_path of ones that fail to load
                continue

    return np.array(img_arrays), np.array(labels)

In [98]:
X, y = resize_images_array(img_dir, folders, bucket)

100%|██████████| 3/3 [04:37<00:00, 92.51s/it]


In [99]:
print('X shape: ', X.shape)
print('y_shape: ', y.shape)

X shape:  (2986, 299, 299, 3)
y_shape:  (2986,)


In [100]:
img_df = pd.DataFrame(y)

# img_df['img_array'] = [i.flatten() for i in X]

# img_df.info()

# img_df.to_csv('data/img_df.csv')

# imgs = pd.read_csv('data/img_df.csv', index_col=0)

# X = [np.array(i).reshape(299,299,3) for i in imgs['img_array']]

In [105]:
y

array(['ducks', 'ducks', 'ducks', ..., 'hawks', 'hawks', 'hawks'],
      dtype='<U7')

### Look at single image

In [None]:
s3 = boto3.client('s3')

# grab duck image
obj = s3.get_object(Bucket=bucket, Key=f'new_images/ducks/0296/069519c379574fb285d7bb920443ea89.jpg')
img_bytes = BytesIO(obj['Body'].read())
duck1 = Image.open(img_bytes)

# grab hawk image
obj = s3.get_object(Bucket=bucket, Key=f'new_images/hawks/0495/03126240f9974b259e1c0bc142af7edc.jpg')
img_bytes = BytesIO(obj['Body'].read())
hawk1 = Image.open(img_bytes)

# grab finch image
obj = s3.get_object(Bucket=bucket, Key=f'new_images/finches/1001/0edd165e46054dd388dcb9dae4e58f87.jpg')
img_bytes = BytesIO(obj['Body'].read())
finch1 = Image.open(img_bytes)

In [None]:
# By stacking these together into a 3-tensor, we can represent a color image as a single object.

fig, axes = plt.subplots(3, 4, figsize=(20,10));

fig.suptitle('RBG Channels of Images', y=1.1, fontsize=20)

axes[0][0].imshow(duck1);
axes[0][0].set_title('original')
for ax, channel, name in zip(axes[0][1:], duck1.split(), ['red channel', 'green channel', 'blue channel']):
    ax.imshow(channel, cmap=f'{name.split()[0].capitalize()}s_r');
    ax.set_title(name)
    
axes[1][0].imshow(hawk1);
axes[1][0].set_title('original')
for ax, channel, name in zip(axes[1][1:], hawk1.split(), ['red channel', 'green channel', 'blue channel']):
    ax.imshow(channel, cmap=f'{name.split()[0].capitalize()}s_r');
    ax.set_title(name)
    
axes[2][0].imshow(finch1);
axes[2][0].set_title('original')
for ax, channel, name in zip(axes[2][1:], finch1.split(), ['red channel', 'green channel', 'blue channel']):
    ax.imshow(channel, cmap=f'{name.split()[0].capitalize()}s_r');
    ax.set_title(name)

plt.tight_layout()
plt.savefig('graphs/dhf_RBGplot.png')

### Normalize Feature Arrays

In [None]:
# normalize the RBG values
X = X/255.0

### Label and Features

In [106]:
label = y.copy()

In [107]:
label

array(['ducks', 'ducks', 'ducks', ..., 'hawks', 'hawks', 'hawks'],
      dtype='<U7')

In [108]:
print('Number of Ducks: ', np.sum(label == 'ducks'))
print('Number of Finches: ', np.sum(label == 'finches'))
print('Number of Hawks: ', np.sum(label == 'hawks'))

Number of Ducks:  990
Number of Finches:  998
Number of Hawks:  998


In [111]:
np.array(label.reshape(-1,1) == folders)

array([[ True, False, False],
       [ True, False, False],
       [ True, False, False],
       ...,
       [False, False,  True],
       [False, False,  True],
       [False, False,  True]])

In [109]:
y = np.array(label.reshape(-1,1) == folders).astype(float)

In [110]:
y[:5]

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [None]:
print('label shape: ', y.shape)
print('features shape: ', X.shape)

# Model Time

### Work Flow

1. X, and y defined
- make sure they are arrays!!

2. normalize X values by dividing by 255
3. check images
4. train test split
5. make model Sequential()
6. add input layer
7. add multiple hidden layers
8. ADD FLATTEN LAYER, MUST BE BEFORE OUTPUT
9. add dense layer, which are fully connected layers
10. add output dense layer, will be the amount of labels there are
11. model.compile(loss = 'sparse_categorical_crossentropy', optimizer= 'adam', metrics=['accuracy'])
12. model.fit(xtrain, ytrain, epochs) also has validation_split (out of sample) do about 0.1, batchsize: how many at a time, more data requires bigger (20-200 range)
13. model.evaluate(xtest,ytest) returns val loss and val accuracy  

14. model.save('name') saves the model
- to load: new_model = tf.keras.models.load_model('name')

### Import Libraries

In [17]:
# keras and tensorflow downloads
import tensorflow as tf

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPool2D, BatchNormalization # CNN
from tensorflow.keras.models import Model

from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, RMSprop

from tensorflow.keras.callbacks import TensorBoard # graphical visual of loss and accuracy over the epochs of train and test set
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import datetime

tf.__version__

'2.2.0'

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
# check to make sure the bird images and labels are aligned
# this is indeed a duck

plt.title(f'{folders[y_train[10].argmax()]}')
plt.imshow(X_train[10]);

# plt.savefig('graphs/duck1.png')

In [None]:
print('X_train shape: ', X_train.shape)
print('X_test shape: ', X_test.shape)
print('y_train shape: ', y_train.shape)
print('y_test shape: ', y_test.shape)

## Xception: Transfer Model

In [None]:
log_xcept = os.path.join("logs/xception", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(log_dir=log_xcept, histogram_freq=1)

In [None]:
# %load_ext tensorboard

# %tensorboard --logdir='logs/'

In [None]:
input_size = (299,299,3)
model = Xception(weights='imagenet',
                          include_top=True,
                          input_shape=input_size)

In [None]:
def print_model_properties(model, indices = 0):
     for i, layer in enumerate(model.layers[indices:]):
        print(f"Layer {i+indices} | Name: {layer.name} | Trainable: {layer.trainable}")

In [None]:
print_model_properties(model)

In [None]:
def create_transfer_model(input_size, n_categories, weights = 'imagenet'):
        # note that the "top" is not included in the weights below
        base_model = Xception(weights=weights,
                          include_top=False,
                          input_shape=input_size)
        
        model = base_model.output
        model = GlobalAveragePooling2D()(model)
        predictions = Dense(n_categories, activation='softmax')(model)
        model = Model(inputs=base_model.input, outputs=predictions)
        
        return model

In [None]:
transfer_model = create_transfer_model((299,299,3),3)

In [None]:
def change_trainable_layers(model, trainable_index):
    for layer in model.layers[:trainable_index]:
        layer.trainable = False
    for layer in model.layers[trainable_index:]:
        layer.trainable = True

In [None]:
_ = change_trainable_layers(transfer_model, 132)

In [None]:
print_model_properties(transfer_model, 130)

In [None]:
transfer_model.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
transfer_test = transfer_model.fit(X_train, y_train, batch_size = 32, epochs=6, validation_split=0.1)

In [None]:
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

pred1 = transfer_model.predict(X_test)

y_true = y_test.copy()

y_true = np.array([i.argmax() for i in y_true]).reshape(-1,1)

y_predicted = (pred1 > 0.5).astype(float)

y_predicted = np.array([i.argmax() for i in y_predicted]).reshape(-1,1)

mat = confusion_matrix(y_true, y_predicted)

plot_confusion_matrix(conf_mat=mat, figsize=(8,8), class_names=folders);

# plt.savefig('graphs/modelx_7_conf_mat.png')

In [None]:
print('Transfer Model1: Loss and Accuracy')
evaluate = transfer_model.evaluate(X_test, y_test)

In [None]:
xception_final = transfer_model.fit(X, y, batch_size = 32, epochs=6, validation_split=0.1)

# Load Xception

In [19]:
# transfer_model.save('saved_models/xception_final.h5')
load_xception = tf.keras.models.load_model('saved_models/xception_final.h5')

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
import tensorflow.keras.backend as K
import shap

In [None]:
input_size
n_categories = 3

In [None]:
# load pre-trained model and choose two images to explain
vgg = VGG16(weights='imagenet', include_top=False, input_shape=input_size)
to_explain = X[[39,41]]

In [None]:
vgg_model = vgg.output
vgg_model = GlobalAveragePooling2D()(vgg_model)
predictions = Dense(n_categories, activation='softmax')(vgg_model)
vgg_model = Model(inputs=vgg.input, outputs=predictions)

In [None]:
lay = change_trainable_layers(vgg_model, 19)

In [None]:
print_model_properties(vgg_model, 17)

In [None]:
vgg_model.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
vgg_model.summary()

In [None]:
vgg_fit = vgg_model.fit(X_train, y_train, batch_size = 32, epochs=6, validation_split=0.1)

# Load VGG

In [20]:
# vgg_model.save('saved_models/vgg_model.h5')
load_vgg = tf.keras.models.load_model('saved_models/vgg_model.h5')

In [None]:
vgg_eval = vgg_model.evaluate(X_test, y_test)

In [None]:
# plot the explanations
shap.image_plot(shap_values, to_explain, index_names)

In [None]:
s_model.layers[0].input.ref()

In [None]:
# grab duck image
# obj = s3.get_object(Bucket=bucket, Key=f'new_images/ducks/0296/069519c379574fb285d7bb920443ea89.jpg')
# img_bytes = BytesIO(obj['Body'].read())
open_img3 = Image.open('hawk_duck.png')
hawk_duck = np.array(open_img3.resize((299,299)))

open_img2 = Image.open('indoor_duck.png')
indoor_duck = np.array(open_img2.resize((299,299)))

In [None]:
hawk_duck.shape

In [None]:
load_xception.input_shape

In [None]:
load_vgg.predict(indoor_duck.reshape(1,299,299,3))

In [2]:
url = 'http://4.bp.blogspot.com/-b4WNXoiFJh0/UCZ3blXrMPI/AAAAAAAABtM/sL1ocEkITsg/s1600/duck_male_green_head.jpg'

In [10]:
from PIL import Image
import requests
from io import BytesIO
import numpy as np
import matplotlib.pyplot as plt

In [3]:
response = requests.get(url)
print(response.content)
img = Image.open(BytesIO(response.content))
test_img = np.array(img.resize((299,299)))
plt.imshow(img)

b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00\xe6\x00\xe6\x00\x00\xff\xe1\x00lExif\x00\x00II*\x00\x08\x00\x00\x00\x03\x001\x01\x02\x00\x07\x00\x00\x002\x00\x00\x00\x12\x02\x03\x00\x02\x00\x00\x00\x02\x00\x02\x00i\x87\x04\x00\x01\x00\x00\x00:\x00\x00\x00\x00\x00\x00\x00Google\x00\x00\x03\x00\x00\x90\x07\x00\x04\x00\x00\x000220\x02\xa0\x04\x00\x01\x00\x00\x00@\x06\x00\x00\x03\xa0\x04\x00\x01\x00\x00\x00\xb0\x04\x00\x00\x00\x00\x00\x00\xff\xdb\x00C\x00\n\x07\x07\x08\x07\x06\n\x08\x08\x08\x0b\n\n\x0b\x0e\x18\x10\x0e\r\r\x0e\x1d\x15\x16\x11\x18#\x1f%$"\x1f"!&+7/&)4)!"0A149;>>>%.DIC<H7=>;\xff\xdb\x00C\x01\n\x0b\x0b\x0e\r\x0e\x1c\x10\x10\x1c;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\xff\xc2\x00\x11\x08\x04\xb0\x06@\x03\x01"\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x1b\x00\x00\x02\x03\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x01\x02\x05\x00\x06\x07\xff\xc4\x00\x19\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x02\x03\x04\x05\xff\

NameError: name 'plt' is not defined

In [76]:
file = 'tmp/temp_folder/meet_duck.png'

a = Image.open(file)

c = np.array(a.resize((299,299)))
a

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [112]:
ans = load_xception.predict(c.reshape(1,299,299,3))

In [117]:
np.round(ans, 3)[0][2]

0.0