# Setup Environment and Prep Images


## Install TF 2.0 w/ GPU

In [0]:
!pip install -qq tf-nightly-gpu-2.0-preview

[K     |████████████████████████████████| 378.8MB 76kB/s 
[K     |████████████████████████████████| 4.1MB 25.4MB/s 
[K     |████████████████████████████████| 61kB 21.2MB/s 
[K     |████████████████████████████████| 450kB 44.6MB/s 
[?25h  Building wheel for opt-einsum (setup.py) ... [?25l[?25hdone


In [0]:
# if no GPU is found press Runtime (in the menu at the top) and choose "Change Runtime Type" to GPU
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [0]:
# Import tools
import numpy as np
import pandas as pd
import itertools as IT
import matplotlib.pyplot as plt
import csv
plt.style.use(['dark_background'])
from PIL import Image
import requests
from io import BytesIO

# PyDrive Configs
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Gcloud Configs
from google.cloud import storage
from google.colab import auth
auth.authenticate_user()
from oauth2client.service_account import ServiceAccountCredentials
import os

%matplotlib inline
import matplotlib.pyplot as ply

import os, json
from glob import glob

# TF Configs
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model,load_model,Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout,Flatten, Input
from tensorflow.keras import backend as K

import tensorflow as tf

!gcloud config set project 'transit-insurance-analytics'
client = storage.Client(project='transit-insurance-analytics')
bucket_name = 'bukalapak-gadget-images'

[?25l[K     |▎                               | 10kB 19.5MB/s eta 0:00:01[K     |▋                               | 20kB 1.8MB/s eta 0:00:01[K     |█                               | 30kB 2.6MB/s eta 0:00:01[K     |█▎                              | 40kB 1.7MB/s eta 0:00:01[K     |█▋                              | 51kB 2.1MB/s eta 0:00:01[K     |██                              | 61kB 2.5MB/s eta 0:00:01[K     |██▎                             | 71kB 2.9MB/s eta 0:00:01[K     |██▋                             | 81kB 3.3MB/s eta 0:00:01[K     |███                             | 92kB 3.7MB/s eta 0:00:01[K     |███▎                            | 102kB 2.8MB/s eta 0:00:01[K     |███▋                            | 112kB 2.8MB/s eta 0:00:01[K     |████                            | 122kB 2.8MB/s eta 0:00:01[K     |████▎                           | 133kB 2.8MB/s eta 0:00:01[K     |████▋                           | 143kB 2.8MB/s eta 0:00:01[K     |█████                     

W0730 02:25:41.127802 140577716066176 _default.py:280] No project ID could be determined. Consider running `gcloud config set project` or setting the GOOGLE_CLOUD_PROJECT environment variable


## Import Images from GCS

In [0]:
!gsutil -m cp -r gs://bukalapak-gadget-images/phone.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/laptop.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/smartwatch.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/cable.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/charger.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/keyboard.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/tablet.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/screenguard.zip /tmp/
!gsutil -m cp -r gs://bukalapak-gadget-images/other.zip /tmp/

Copying gs://bukalapak-gadget-images/phone.zip...
\ [1/1 files][ 29.1 MiB/ 29.1 MiB] 100% Done                                    
Operation completed over 1 objects/29.1 MiB.                                     
Copying gs://bukalapak-gadget-images/laptop.zip...
\ [1/1 files][ 29.7 MiB/ 29.7 MiB] 100% Done                                    
Operation completed over 1 objects/29.7 MiB.                                     
Traceback (most recent call last):
  File "/tools/google-cloud-sdk/bin/bootstrapping/gsutil.py", line 13, in <module>
    import bootstrapping
  File "/tools/google-cloud-sdk/bin/bootstrapping/bootstrapping.py", line 46, in <module>
    from googlecloudsdk.core.updater import update_manager
  File "/tools/google-cloud-sdk/lib/googlecloudsdk/core/updater/update_manager.py", line 39, in <module>
    from googlecloudsdk.core.resource import resource_printer
  File "/tools/google-cloud-sdk/lib/googlecloudsdk/core/resource/resource_printer.py", line 42, in <module>
    fr

In [0]:
!unzip -qq /tmp/phone.zip -d /content/phone/
!unzip -qq /tmp/laptop.zip -d /content/laptop/
!unzip -qq /tmp/smartwatch.zip -d /content/smartwatch/
!unzip -qq /tmp/cable.zip -d /content/cable/
!unzip -qq /tmp/charger.zip -d /content/charger/
!unzip -qq /tmp/keyboard.zip -d /content/keyboard/
!unzip -qq /tmp/tablet.zip -d /content/tablet/
!unzip -qq /tmp/screenguard.zip -d /content/screenguard/
!unzip -qq /tmp/other.zip -d /content/other/

## Converting raw files in folders into something we can feed into tf.data

We use glob to get lists of the files in the directories and then convert them into dataframes and add in class numbers.

We also then split them up so we have 10% for a testing set and the rest for training.

finally we randomly shuffle them up


In [0]:
import glob
import pandas as pd
data = []
categories = ["phone", "laptop", "smartwatch", "cable", "charger", "other", "screenguard", "keyboard", "tablet"]

def extract(category):
  path = "./" + category
  for root, dirs, files in os.walk(path, topdown=True):
    for name in files:
      filename = os.path.abspath(os.path.join(root, name))
      class_name = os.path.basename(root)
      data.append((filename, class_name))
            
for category in categories:
  extract(category)
  
df = pd.DataFrame(data, columns=['filename', 'class_name'])

df['class_name'] = df['class_name'].astype('category')
df['class'] = df['class_name'].cat.codes

#shuffle
df = df.sample(frac=1).reset_index(drop=True)

print(df.head())
print(len(df))
df.dtypes

In [0]:
# Split into train and validation sets
train_set_percentage = .9


df = df[:int(len(df)*train_set_percentage)]
df_val = df[int(len(df)*train_set_percentage):]

# shuffle 
df = df.sample(frac=1).reset_index(drop=True)
df_val = df_val.sample(frac=1).reset_index(drop=True)

## Make the pipeline for loading and resizing the images

In [0]:
# Reads an image from a file, decodes it into a tensor, and resizes it
# to a fixed shape.
img_rows, img_cols, channels = 224,224,3
num_classes = 9
batch_size = 32

def _parse_function(filename, label):
  image_string = tf.io.read_file(filename)
  image_decoded = tf.image.decode_jpeg(image_string)
  image_decoded = tf.image.random_flip_left_right(image_decoded)
  image_decoded = tf.image.random_flip_up_down(image_decoded)
#   image_decoded = tf.image.random_crop(image_decoded, [200,200,channels])
  image_decoded = tf.image.random_brightness(image_decoded, 0.5)
  image_decoded = tf.image.random_hue(image_decoded, 0.5)
  image_decoded = tf.image.random_saturation(image_decoded, 0, 0.5)
  image_resized = tf.image.resize(image_decoded, [img_rows, img_cols])
  image_resized = tf.ensure_shape(image_resized ,shape=(img_rows, img_cols, channels))
  image_resized.set_shape([img_cols, img_rows, channels])
  label = tf.one_hot(label, num_classes)
  return image_resized, label



# CNN Pipeline

## Assembling the Data pipeline using tf.data

In [0]:
train_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(df['filename'].values, tf.string),
                                                    tf.cast(df['class'].values, tf.int32) ))
train_dataset = train_dataset.repeat(5)
train_dataset = train_dataset.map(_parse_function)
train_dataset = train_dataset.shuffle(5000)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.batch(batch_size, drop_remainder=True)

In [0]:
valid_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(df_val['filename'].values, tf.string),
                                                    tf.cast(df_val['class'].values, tf.int32) ))
valid_dataset = valid_dataset.repeat(5)
valid_dataset = valid_dataset.map(_parse_function)
valid_dataset = valid_dataset.shuffle(5000)
valid_dataset = valid_dataset.repeat()
valid_dataset = valid_dataset.batch(batch_size, drop_remainder=True)

This will download the *mobilenet* network weights 

In [0]:
# create the base pre-trained model
base_model = tf.keras.applications.mobilenet.MobileNet(weights='imagenet',include_top=False)

In [0]:
base_model.summary()

In [0]:
# add a global spatial average pooling layer
x=base_model.output
x=GlobalAveragePooling2D()(x)
# x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
# x=Dense(1024,activation='relu')(x) #dense layer 2
x=Dense(512,activation='relu')(x) #dense layer 3
x = Dropout(0.5)(x)

# and a logits layer -- let's say we have 9 classes
predictions = Dense(9, activation='softmax')(x)


In [0]:
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [0]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional MobileNer layers
for layer in base_model.layers:
    print(layer.name)
    if(layer.name[-2:] != "bn"):
      layer.trainable = False


In [0]:
for layer in model.layers:
    print(layer.name,' Trainable =',layer.trainable)

In [0]:
model.summary()

In [0]:
opt = tf.keras.optimizers.Adam(lr=0.001)

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=opt, loss='categorical_crossentropy',metrics=['accuracy'])

In [0]:
train_steps = int(len(df)*train_set_percentage/batch_size) #total train set / batch_size
val_steps = int(len(df)*(1-train_set_percentage)/batch_size)
epochs = 1

print('train steps:',train_steps)
print('val steps:',val_steps)

In [0]:
### Train the model with validation 
history = model.fit( train_dataset, 
                    steps_per_epoch = train_steps,
                   epochs = epochs,
                   validation_data = valid_dataset,
                   validation_steps = val_steps)

In [0]:
epochs = 4

### Train the model with validation 
history = model.fit( train_dataset, steps_per_epoch = train_steps,
                   epochs = epochs,
                   validation_data = valid_dataset,
                   validation_steps = val_steps)

In [0]:
metrics = model.evaluate(valid_dataset,
                   steps = val_steps)
print("model accuracy:",metrics[1])

## Test an Image

In [0]:
#@title Input URL of image to test with:
im_url = "https://www.androidcentral.com/sites/androidcentral.com/files/styles/xlarge_wm_brw/public/article_images/2018/12/totallee-pixel-3-case-review-5.jpg?itok=icPqHcW2" #@param {type:"string"}

from PIL import Image

def make_square(im, min_size=256, fill_color=(0, 0, 0)):
    x, y = im.size
    size = max(min_size, x, y)
    new_im = Image.new('RGB', (size, size), fill_color)
    new_im.paste(im, (int((size - x) / 2), int((size - y) / 2)))
    return new_im

response = requests.get(im_url)
img = make_square(Image.open(BytesIO(response.content)))

from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input, decode_predictions
import numpy as np

size = 224, 224
img.thumbnail(size, Image.ANTIALIAS)

x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
# print('Input image shape:', x.shape)

preds = model.predict(x)
print('Predicted:', preds)

import operator
index, value = max(enumerate(preds[0]), key=operator.itemgetter(1))

prediction = {
    0 : "cable",
    1 : "charger",
    2 : "keyboard",
    3 : "laptop",
    4 : "other",
    5 : "phone",
    6 : "screenguard",
    7 : "smartwatch",
    8 : "tablet"
}

print("Prediction:", prediction[index] + ", Confidence:", value, "\n")

for key in prediction:
  print(str(prediction[key]) + ": " + str(preds[0][key]))

img

In [0]:
model.summary()

### Get the weights

In [0]:
# weights = model.get_weights()

In [0]:
# model.set_weights(weights)

## Fine-tuning

In [0]:
model.trainable = True

set_trainable = False
for layer in model.layers:
    if 'conv_dw_13' in layer.name:
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [0]:
for layer in model.layers:
    print(layer.name,layer.trainable)

In [0]:
# model.set_weights(weights)

In [0]:
# # Train the model with validation 
history = model.fit( train_dataset, steps_per_epoch = train_steps,
                   epochs = 15,
                   validation_data = valid_dataset,
                   validation_steps = val_steps)

In [0]:
metrics = model.evaluate(valid_dataset,
                   steps = val_steps)
print("model accuracy:",metrics[1])

## Results

In [0]:
test_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(df_val['filename'].values, tf.string),
                                                    tf.cast(df_val['class'].values, tf.int32) ))
test_dataset = test_dataset.map(_parse_function)
test_dataset = test_dataset.batch(batch_size, drop_remainder=True)

#Confusion Matrix and Classification Report
from sklearn.metrics import classification_report, confusion_matrix
Y_pred = model.predict_generator(test_dataset, val_steps)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(df_val['class'].tolist()[:len(y_pred)], y_pred), "\n")
print('Classification Report')
target_names = ["cable", "charger", "laptop", "other", "phone", "smartwatch"]
print(classification_report(df_val['class'].tolist()[:len(y_pred)], y_pred, target_names=target_names), "\n")

plt.matshow(confusion_matrix(df_val['class'].tolist()[:len(y_pred)], y_pred))
plt.colorbar()

## Save Model

In [0]:
model.save("./gadget_classifier_v4_mobile.h5")

In [0]:
# Upload to Gdrive
from google.colab import drive
drive.mount('drive')
!cp gadget_classifier_v4_mobile.h5 /content/drive/My\ Drive/Bukalapak\ Gadget/Bukalapak\ Gadget\ Images/models/

In [0]:
model