In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# general imports
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import time

# deep learning libraries
from sklearn.model_selection import train_test_split
import keras
from keras.layers import Dense, Dropout

# image manipulation libraries
from skimage.io import imread
from skimage.transform import resize
from PIL import Image
from skimage.feature import hog
from skimage.color import rgb2grey
import cv2

__Project Plan:__
* Data Preprocessing
  * process original images (put into dataframe, resize)
  * artificially create new images (crop, rotate, flip)
  * PCA or NMF potentially? need to look back into
* Training
  * split into train, test, and eval sets (evaluation for end product showcasing- very small)
  * create Sequential keras model with arrangement of layers- figure out which ones in which order
  * train until accuracy isn't improving- activation functions @ whatnot need to be figured out
* Showcase
  * create pretty images showing what we've done w/ matplotlib or seaborn (preferably on slideshow, not actually going to code itself)
  * potentially create mock GUI in Figma? May help presentation if we have time

__PReLU vs. ReLU__
* ReLU: if x > 0: x; else: 0
* Parameterized ReLU: uses a negative linear function with adjustable coefficient to control the slope, reducong to ReLU with the coefficient is 0


*from Galaxy Classifications with Deep Learning by Lukic and Bruggen*

__PCA vs. NMF__
* NMF (Non-negative Matrix Factorization) is similar to PCA (Priciple Component Analysis) but utilizes only non-negative features of the matrices, allowing to to find the parts of a whole

#Data Preprocessing

In [None]:
"""uses grabCut from OpenCV on an image to isolate the foreground"""
def grab_cut(img):
  # resize original image and create a mask to be altered by grabCut
  resized_img = cv2.resize(img, (250,250))
  mask = np.zeros((resized_img.shape[:2]),np.uint8)

  # models used internally to the grabCut
  bgdModel = np.zeros((1,65),np.float64)
  fgdModel = np.zeros((1,65),np.float64)

  rect = (50, 50, 150, 150) # bounding rectangle for foreground
  cv2.grabCut(resized_img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT) # 5 iterations using rectangle

  # binarize background and foreground
  mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')

  # create final image by multiplying by new mask
  final_img = resized_img*mask2[:,:,np.newaxis]
  return final_img

In [None]:
size = (50,50,3) # resize to what?
""" preprocess for each image type """

def do_picture_stuff(path,label):
  img_data_list = []
  label_list = []
  for filename in os.listdir(path):
    img_path = os.path.join(path,filename)
    image = cv2.imread(img_path)
    print('working on', img_path)

    for angle in (0,90,180,270):
      new_img = grab_cut(image) # comment out this line to not grabcut the image
      new_img = Image.fromarray(new_img).rotate(angle)
      img_data_list.append(np.asarray(np.resize(new_img, size)))
      label_list.append(label)
  return (img_data_list,label_list)


# for path, label in (('drive/MyDrive/CometHack2021/not_poision_ivy_pictures', [1., 0., 0.]), ('drive/MyDrive/CometHack2021/poison_ivy_pictures', [0., 1.,0.]), ('drive/MyDrive/CometHack2021/poison_oak_pictures', [0., 0.,1.])):


poison_list, poison_labels = do_picture_stuff('drive/MyDrive/CometHack2021/poison_ivy_pictures', [0., 1.,0.])
tmp = do_picture_stuff('drive/MyDrive/CometHack2021/poison_oak_pictures', [0., 0.,1.])
poison_list += tmp[0]
poison_labels += tmp[1]

not_list, not_labels = do_picture_stuff('drive/MyDrive/CometHack2021/not_poision_ivy_pictures', [1., 0., 0.])

false_list, false_labels = do_picture_stuff('drive/MyDrive/CometHack2021/False', [1., 0., 0.])

iris_list, iris_labels = do_picture_stuff('drive/MyDrive/CometHack2021/Irises', [1., 0., 0.])



plant_imgs = np.array(img_data_list)
plant_labels = np.array(label_list)
ls = np.array(label_list)

working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy1.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy3.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy4.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy2.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy5.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy6.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy7.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy10.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy11.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy12.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy14.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy16.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy18.jpg
working on drive/MyDrive/CometHack2021/poison_ivy_pictures/ivy19.jpg
working on drive/MyDrive/CometHack2021/po

In [None]:
'''path = 'drive/MyDrive/CometHack2021/not_poision_ivy_pictures/not1.jpg'
img = cv2.imread(path)
resized_img = cv2.resize(img, (250,250))
mask = np.zeros((resized_img.shape[:2]),np.uint8)

# models used internally to the grabCut
bgdModel = np.zeros((1,65),np.float64)
fgdModel = np.zeros((1,65),np.float64)

rect = (50, 50, 150, 150) # bounding rectangle for foreground
cv2.grabCut(resized_img, mask, rect, bgdModel, fgdModel, 10, cv2.GC_INIT_WITH_RECT) # 5 iterations using rectangle

# binarize background and foreground
mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')

# create final image by multiplying by new mask
final_img = resized_img*mask2[:,:,np.newaxis]

plt.imshow(final_img)'''

In [None]:
def hog_it(imgs):
  img_list = []
  for image in imgs:
    img_list.append(np.array(hog(image, orientations=9, pixels_per_cell=(2,2))))
  return np.array(img_list)

poison_hog = hog_it(poison_list)
iris_hog = hog_it(iris_list)
not_la_hog = hog_it(not_list)
false_pg_hog = hog_it(false_list)

#Model with nots (look-alikes)

In [None]:
poison_nots = np.concatenate((poison_hog, not_la_hog))
pn_labels = np.concatenate((poison_labels, not_labels))
x_train, x_temp, y_train, y_temp = train_test_split(poison_nots, pn_labels, test_size=.2)
x_test, x_eval, y_test, y_eval = train_test_split(x_temp, y_temp, test_size=.03, random_state=5)

print('xtrain:\t',len(x_train))
print('xtest:\t',len(x_test))
print('xeval:\t',len(x_eval))
print(y_eval)

len(x_train[0])

xtrain:	 323
xtest:	 78
xeval:	 3
[[0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]


42849

In [None]:
model = keras.models.Sequential()
model.add(Dense(50, activation='relu', input_shape=(42849,)))
model.add(Dense(50, activation='relu'))
model.add(Dropout(.1))
model.add(Dense(3, activation='softmax'))

model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_42 (Dense)             (None, 50)                2142500   
_________________________________________________________________
dense_43 (Dense)             (None, 50)                2550      
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_44 (Dense)             (None, 3)                 153       
Total params: 2,145,203
Trainable params: 2,145,203
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(learning_rate=.001), metrics=['accuracy'])
model.fit(x_train, y_train, epochs=100, validation_data=(x_test, y_test))

test_eval = model.evaluate(x_test, y_test, verbose=False)
print("Test loss, accuracy: %s, %s" % (test_eval[0], test_eval[1]))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
print(y_eval)
model.predict(x_eval)

[[0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]


array([[0.34163994, 0.32818028, 0.33017975],
       [0.34163994, 0.32818028, 0.33017975],
       [0.34163994, 0.32818028, 0.33017975]], dtype=float32)

# Model with look-alikes and no hog used

In [None]:
x_tra1n, x_t3mp, y_tra1n, y_t3mp = train_test_split(plant_imgs, plant_labels, test_size = .2)
x_t3st, x_3val, y_t3st, y_3val = train_test_split(x_t3mp, y_t3mp, test_size = .03)

In [None]:
# build model
m0del = keras.models.Sequential()
m0del.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(50, 50, 3))) # convolutional networks to make image smaller
m0del.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
m0del.add(MaxPooling2D(pool_size=(2,2))) # pool pixels to get largest value
m0del.add(Flatten()) # flatten model to prepare for hidden layer
m0del.add(Dense(50, activation='relu')) # fully connected, 'hidden' layer
'''can add dropout here if model overfits'''
m0del.add(Dense(3, activation='softmax')) # prediction layer

# show summary of model
m0del.summary()

In [None]:
m0del.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(learning_rate=.001), metrics=['accuracy'])
m0del.fit(x_tra1n, y_tra1n, epochs=100, validation_data=(x_t3st, y_t3st))

t3st_eval = m0del.evaluate(x_t3st, y_t3st, verbose=False)
print("Test loss, accuracy: %s, %s" % (t3st_eval[0], t3st_eval[1]))

In [None]:
print(y_3val)
m0del.predict(x_3val)

# Model using falses (pampas grasses)

In [None]:
poison_falses = np.concatenate((poison_hog, false_pg_hog))
pf_labels = np.concatenate((poison_labels, false_labels))
a_train, a_temp, b_train, b_temp = train_test_split(poison_falses, pf_labels, test_size=.2)
a_test, a_eval, b_test, b_eval = train_test_split(a_temp, b_temp, test_size=.03)

print('atrain:\t',len(a_train))
print('atest:\t',len(a_test))
print('aeval:\t',len(a_eval))
print(b_eval)

len(a_train[0])

atrain:	 313
atest:	 76
aeval:	 3
[[0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


42849

In [None]:
fmodel = keras.models.Sequential()
fmodel.add(Dense(50, activation='relu', input_shape=(42849,)))
fmodel.add(Dense(50, activation='relu'))
fmodel.add(Dropout(.1))
fmodel.add(Dense(3, activation='softmax'))

fmodel.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_45 (Dense)             (None, 50)                2142500   
_________________________________________________________________
dense_46 (Dense)             (None, 50)                2550      
_________________________________________________________________
dropout_2 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_47 (Dense)             (None, 3)                 153       
Total params: 2,145,203
Trainable params: 2,145,203
Non-trainable params: 0
_________________________________________________________________


In [None]:
fmodel.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(learning_rate=.001), metrics=['accuracy'])
fmodel.fit(a_train, b_train, epochs=100, validation_data=(a_test, b_test))

ab_test_eval = model.evaluate(a_test, b_test, verbose=False)
print("Test loss, accuracy: %s, %s" % (ab_test_eval[0], ab_test_eval[1]))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
print(b_eval)
fmodel.predict(a_eval)

[[0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]


array([[0.3102557 , 0.34494147, 0.3448028 ],
       [0.3102557 , 0.34494147, 0.3448028 ],
       [0.3102557 , 0.34494147, 0.3448028 ]], dtype=float32)

# Model using irises

In [None]:
poison_iris = np.concatenate((poison_hog, iris_hog))
pi_labels = np.concatenate((poison_labels, iris_labels))
i_train, i_temp, j_train, j_temp = train_test_split(poison_iris, pi_labels, test_size=.2)
i_test, i_eval, j_test, j_eval = train_test_split(i_temp, j_temp, test_size=.03)

print('itrain:\t',len(i_train))
print('itest:\t',len(i_test))
print('ieval:\t',len(i_eval))
print(j_eval)

len(i_train[0])

itrain:	 316
itest:	 77
ieval:	 3
[[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]]


42849

In [None]:
imodel = keras.models.Sequential()
imodel.add(Dense(50, activation='relu', input_shape=(42849,)))
imodel.add(Dense(50,activation='relu'))
imodel.add(Dropout(.1))
imodel.add(Dense(3, activation='softmax'))

imodel.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_50 (Dense)             (None, 50)                2142500   
_________________________________________________________________
dense_51 (Dense)             (None, 50)                2550      
_________________________________________________________________
dropout_3 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_52 (Dense)             (None, 3)                 153       
Total params: 2,145,203
Trainable params: 2,145,203
Non-trainable params: 0
_________________________________________________________________


In [None]:

imodel.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])
imodel.fit(i_train, j_train, epochs=100, validation_data=(i_test, j_test))

ij_test_eval = model.evaluate(i_test, j_test, verbose=False)
print(f'Test loss, accuracty: {ij_test_eval[0]}, {ij_test_eval[1]}')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:

print(j_eval)
fmodel.predict(i_eval)

[[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]]


array([[0.33505005, 0.34478676, 0.32016316],
       [0.33505005, 0.34478676, 0.32016316],
       [0.33505005, 0.34478676, 0.32016316]], dtype=float32)