<a href="https://colab.research.google.com/github/zerotodeeplearning/ztdl-masterclasses/blob/master/solutions_do_not_open/Image_Classification_solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Learn with us: www.zerotodeeplearning.com

Copyright © 2021: Zero to Deep Learning ® Catalit LLC.

In [None]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Image Classification

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.datasets import load_digits
from tensorflow.keras.datasets import mnist, fashion_mnist, cifar10

### Helper functions
Lets define a few helper functions

In [None]:
def describe_dataset(name, X_train, X_test, y_train, y_test):
  xtrs = X_train.shape
  xtes = X_test.shape
  percent = np.round(100 * xtes[0] / (xtes[0] + xtrs[0]), 2)
  dtype = X_train.dtype
  m = X_train.min()
  M = X_train.max()
  print("""
\033[1mDataset: {name}
==========================================\033[0m

The feature tensors X_train and X_test have \033[1m{axes} axes\033[0m.
  
X_train.shape:\t {xtrs}
There are {trimg} images in the training set.

X_test.shape:\t {xtes}
There are {teimg} images in the training set.

Test size is {percent}% of total.

Each image has {pix} pixels.

Pixels are \033[1m{dtype} values\033[0m  between {m} and {M}

y_train.shape:\t {ytrs}
y_test.shape:\t {ytes}

There are \033[1m{cls} classes\033[0m in the dataset.
  
  """.format(name=name,
             axes=len(xtrs),
             xtrs=xtrs,
             trimg=xtrs[0],
             xtes=xtes,
             teimg=xtes[0],
             percent=percent,
             pix=xtrs[1:],
             dtype=dtype,
             m=m,
             M=M,
             ytrs=y_train.shape,
             ytes=y_test.shape,
             cls=len(np.unique(y_train))
             ))

def flatten_images(X_train, X_test):
  Ntr = len(X_train)
  Nte = len(X_test)
  return X_train.reshape(Ntr, -1), X_test.reshape(Nte, -1)

def load_images_dataset(option='scikit_digits', flatten=False):
  if option=='scikit_digits':
    X, y = load_digits(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0)
    name = "Scikit Learn digits"
  elif option=='keras_digits':
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    name = "Keras MNIST digits"
  elif option=='keras_fashion':
    (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
    name = "Keras Fashion MNIST clothes.\nclass_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']"
  elif option=='keras_cifar':
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    name = "Keras Cifar10 objects"
  else:
    raise ValueError(f"{option} is not a valid option.")

  if flatten:
    X_train, X_test = flatten_images(X_train, X_test)
  describe_dataset(name, X_train, X_test, y_train, y_test)
  return X_train, X_test, y_train, y_test

def display_few_images(X, y, cmap=None, asarray=False):
  n_classes = 10
  n_rows = 4

  print(f"Displaying {n_classes*n_rows} images", end='')
  if asarray:
    print(f" as flattened arrays")
  else:
    print()

  plt.figure(figsize=(14, 5))
  for i in range(n_classes):
    imgs = X[y.ravel() == i][:n_rows]
    labels = y[y.ravel() == i][:n_rows]
    for j in range(n_rows):
      img = imgs[j]
      s = img.shape
      if len(s)==1:
        r = int(np.sqrt(s))
        img = img.reshape(r, r)
      plt.subplot(4, 10, j*n_classes+i+1)
      if asarray:
        plt.plot(img.ravel())
      else:
        plt.imshow(img, cmap=cmap)
      plt.axis('off')
      if j == 0:
        plt.title(str(labels[j]))
    plt.tight_layout()

### Digits dataset

In [None]:
X_train, X_test, y_train, y_test = load_images_dataset('scikit_digits')

In [None]:
display_few_images(X_train, y_train, cmap='gray')

In [None]:
display_few_images(X_train, y_train, asarray=True)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
def train_eval_scikit(model):
  model.fit(X_train, y_train)
  train_score = model.score(X_train, y_train)
  test_score = model.score(X_test, y_test)
  print("Train score:\t {:0.3}".format(train_score))
  print("Test score:\t {:0.3}".format(test_score))


def train_eval_tf(model, epochs=5, batch_size=32):
  h = model.fit(X_train, y_train, epochs=epochs, 
                batch_size=batch_size, validation_split=0.1)
  _, train_score = model.evaluate(X_train, y_train, verbose=0)
  _, test_score = model.evaluate(X_test, y_test, verbose=0)
  print("Train score:\t {:0.3}".format(train_score))
  print("Test score:\t {:0.3}".format(test_score))
  pd.DataFrame(h.history).plot()
  plt.ylim(0, 1)

In [None]:
train_eval_scikit(LogisticRegression(solver='liblinear'))

In [None]:
train_eval_scikit(GradientBoostingClassifier())

### Exercise 1
Load the dataset `'keras_digits'` using the `load_images_dataset` function and train a model.

- Read the description printed by the `load_images_dataset` function and make sure you understand all of it.
- Compare the description in the cases of `flatten=True` and `flatten=False`. Which of the two are you going to use?
- Display a few images using the `display_few_images`, make sure you understand how they are
- Display a few plots using the option `asarray=True`. Do plots with the same label look similar?
- Use the `train_eval_scikit` function to evaluate the performance of a `LogisticRegression` model on this dataset.

In [None]:
X_train, X_test, y_train, y_test = load_images_dataset('keras_digits', flatten=True)

In [None]:
display_few_images(X_train, y_train, cmap='gray')

In [None]:
display_few_images(X_train, y_train, asarray=True)

In [None]:
train_eval_scikit(LogisticRegression())

### Exercise 2: Neural network model

Design a simple fully connected neural network model using the `Sequential` API and use the `train_eval_tf` function to assess its performance on the MNIST dataset

- The model architecture should include:
  - An intial `Rescaling` layer to scale the pixel values by `1/255.` so that they are values in the interval `[0, 1]`.
  - Any number of hidden layers. Make sure to use activation functions
  - A final `Dense` layer with 10 nodes and a softmax
- Compile the model, use an optimizer of your choice and the `sparse_categorical_crossentropy` loss

Your code should look like:

```python
model = Sequential([
  # YOUR CODE HERE
  # ...
])

model.compile(# YOUR CODE HERE)
```



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.optimizers import Adam, RMSprop

In [None]:
model = Sequential([
  Rescaling(scale=1/255.),
  Dense(512, activation='relu'),
  Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
train_eval_tf(model)

### Fashion MNIST & Cifar10 Datasets

The MNIST dataset is quite easy. Let's look at a couple of more interesting datasets: Fashion MNIST and Cifar10

In [None]:
X_train, X_test, y_train, y_test = load_images_dataset('keras_fashion', flatten=True)

In [None]:
display_few_images(X_train, y_train, cmap='gray')

In [None]:
display_few_images(X_train, y_train, asarray=True)

In [None]:
train_eval_tf(model)

In [None]:
X_train, X_test, y_train, y_test = load_images_dataset('keras_cifar')

In [None]:
display_few_images(X_train, y_train)

In [None]:
display_few_images(X_train, y_train, asarray=True)

### Exercise 3: Neural network for 3D data with functional API

Define a new neural network using the functional API and make it work with 3D input data. 

- The architecture will be similar to the previous model
- You will need to introduce a `Flatten` layer at the beginning of the network so that the images are flattened to arrays before being passed to the inner layers
- Introduce some additional inner layers to give the network enough freedom to learn
- Bonus point if you define an auxiliary model that has the second-to-last layer as output for inspection. Set the size of this layer to 256
- Double bonus points if you use a dimensionality reduction technique to reduce the 256 outputs to 3 dimensions and visualize the results on a scatter plot.


You will notice that training proceeds quite slowly with this dataset. Try the following things:

- Increase the `batch_size` in the `train_eval_tf` function
- Switch the backend of the notebook from CPU to GPU usin the `Edit->Notebook Settings` menu and re-run the whole notebook. You should see a speed increase

Your code should look like:

```python
inputs = ...
# YOUR CODE HERE
#...
outputs = ...

model = Model(inputs=inputs, outputs=outputs)
model1 = Model(inputs=inputs, outputs=x)

model.compile(# YOUR CODE HERE)
```



In [None]:
from tensorflow.keras.layers import Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from sklearn.decomposition import KernelPCA
import plotly.express as px

In [None]:
inputs = Input(shape=(32, 32, 3))
x = Flatten()(inputs)
x = Rescaling(scale=1/255.)(x)
x = Dense(2048, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
outputs = Dense(10, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)
model1 = Model(inputs=inputs, outputs=x)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
train_eval_tf(model, batch_size=128, epochs=16)

In [None]:
n_points = 1000

In [None]:
bottlenecks = model1(X_test[:n_points])

In [None]:
H = KernelPCA(n_components=3).fit_transform(bottlenecks)

X_pca = pd.DataFrame(H, columns=['c1', 'c2', 'c3'])

X_pca['label'] = y_test[:n_points]

In [None]:
px.scatter_3d(X_pca, x='c1', y='c2',z='c3', color='label')

In [None]:
# Save test embeddings for visualization in projector
np.savetxt("vecs.tsv", bottlenecks, delimiter='\t')

with open('meta.tsv', 'w', encoding='utf-8') as out_m:
  for labels in y_test[:n_points]:
      [out_m.write(str(x) + "\n") for x in labels]

try:
  from google.colab import files
  files.download('vecs.tsv')
  files.download('meta.tsv')
  print("Now go to https://projector.tensorflow.org/ and upload your files")
except:
  print("Couldn't download files")
  pass