<a href="https://colab.research.google.com/github/bvschwartz/selfie-ml/blob/master/Selfie.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Download** and sort selfie data

In [0]:
# get the extra .py files from git
!rm -rf master.zip selfie-ml-master wget.log
!wget https://github.com/bvschwartz/selfie-ml/archive/master.zip -o wget.log
!unzip -q master.zip
import sys
print(sys.path)
if not '/content/selfie-ml-master' in sys.path:
  print ("adding to path")
  sys.path.insert(0, '/content/selfie-ml-master')

In [0]:
from google.colab import output
def beep():
  output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

In [0]:
# get the selfie dataset
!wget "https://www.crcv.ucf.edu/data/Selfie/Selfie-dataset.tar.gz"
!tar xzf ./Selfie-dataset.tar.gz
!rm ./Selfie-dataset.tar.gz
beep()

In [0]:
# break selfie data into labeled directories
import segment
segment.copy_all()

In [0]:
# create testing and training imagedata
from keras.preprocessing.image import ImageDataGenerator

train_dir = 'dataset/training/color'
test_dir = 'dataset/testing/color'

train_it = ImageDataGenerator(rescale=1.0/255)
print("loading training data")
train_it = train_it.flow_from_directory(train_dir, class_mode='sparse', batch_size=32, target_size = (306, 306), color_mode="rgb", shuffle=True)
cats = len(train_it.class_indices)
print("categories:", cats)
print("loading testing data")
test_it = ImageDataGenerator(rescale=1.0/255)
test_it = test_it.flow_from_directory(test_dir, class_mode='sparse', batch_size=32, target_size = (306, 306), color_mode="rgb", shuffle=False)


In [0]:
# OR... get the figaro dataset
!rm -r Figaro.zip published_DB __MACOSX
!wget -q "http://projects.i-ctm.eu/sites/default/files/AltroMateriale/207_Michele%20Svanera/Figaro.zip"
!unzip -q ./Figaro.zip
!rm -r __MACOSX

In [0]:
# ... create training set for figaro data
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
fig_generator = ImageDataGenerator(rescale=1.0/255, validation_split=0.2)
fig_train = fig_generator.flow_from_directory('published_DB/Originals', subset='training', target_size = (306, 306),
     class_mode='sparse', batch_size=32, color_mode="rgb", shuffle=True)
fig_test = fig_generator.flow_from_directory('published_DB/Originals', subset='validation', target_size = (306, 306),
     class_mode='sparse', batch_size=32, color_mode="rgb", shuffle=False)
cats = len(fig_test.class_indices)
print(cats)
train_it = fig_train
test_it = fig_test


In [0]:
import tensorflow as tf
import numpy as np


model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(306, 306, 3)),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Flatten(),
  #tf.keras.layers.Flatten(input_shape=(306, 306, 3)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(cats, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [0]:
model.fit(train_it, epochs=10)
beep()

In [0]:
model.evaluate(test_it)

In [0]:
# scratch: look at training data 
(x, y) = train_it.next()
print(len(x), y)

In [0]:
# scratch: display some of the images and labels
train_it.reset()
(images, indexes) = train_it.next()
#print(indexes)
#print(np.histogram(indexes))
print(np.histogram([0, 0, 0, 1, 1, 2, 2, 2, 2], bins=3, range=[0, 3]))
print(np.sum([[1, 2, 3, 4], [4, 3, 2, 1], [10, 9, 8, 7]], axis=0))
# build the index -> category
index_to_label = [0] * len(train_it.class_indices)
#print(train_it.class_indices)
#print(train_it.num_classes)
for v, i in train_it.class_indices.items():
  #print("*", i, "*", v)
  index_to_label[i] = v
print(index_to_label)

# display the 32 images with category
import numpy as np
import matplotlib.pyplot as plt

print(images[0].shape)
for i in range(len(images)):
  image = images[i]
  label = index_to_label[int(indexes[i])]
  print(label)
  plt.figure()
  #print(x.shape)
  plt.imshow(image)
  #plt.colorbar()
  plt.grid(False)
  plt.show()


In [0]:
# scratch: histogram of predictions
p = model.predict(test_it, verbose=1)
print(np.histogram(np.argmax(p, axis=1), bins=np.arange(0, cats + 1))[0])

In [0]:
# scratch: compare prediction to ground truth for a batch of test data
n = test_it.next()
p = model.predict(n[0])
for i in range(len(p)):
  print("correct:", n[1][i], "argmax:", np.argmax(p[i]), "raw:", p[i])
#print(n[1])
#print(p)

In [0]:
# scratch: display an image
import numpy as np
import matplotlib.pyplot as plt

n = train_it.next()
print(n[0].shape)
x = n[0][0]
print(n[0])
#print(train_it.classes)
#x = train_it.next()[0][0]
plt.figure()
print(x.shape)
plt.imshow(x)
plt.colorbar()
plt.grid(False)
plt.show()
