[View in Colaboratory](https://colab.research.google.com/github/davidvela/testColabGH/blob/master/KaggTUT.ipynb)

# KAGGLE TUT
https://www.kaggle.com/learn/deep-learning

**created: 20.08.2018**

Dan Becker - Data Scientist
Dan has done data science consulting for 6 companies in the Fortune 100 and contributed to the Keras library for deep learning. He has a PhD in Econometrics.

I need to download the utils locally and run it ... or load the models in Colab. 

# Intro - dog pred. 
https://www.kaggle.com/dansbecker/programming-in-tensorflow-and-keras/notebook 

In [0]:
# Imports: 
# 1 read data 
from os.path import join

# 2 pre-processing
import numpy as np
from tensorflow.python.keras.applications.resnet50 import preprocess_input
from tensorflow.python.keras.preprocessing.image import load_img, img_to_array

# 3 model 
from tensorflow.python.keras.applications import ResNet50

# 4 vis
import sys
from decode_predictions import decode_predictions
from IPython.display import Image, display
import json


In [0]:
class_dir = '../input/resnet50/imagenet_class_index.json'
utils_dir = '/kaggle/input/python-utility-code-for-deep-learning-exercises/utils'
image_dir = '../input/dog-breed-identification/train/'
model_weights = '../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels.h5'

In [0]:
# read the data ... (dog images)
img_paths = [join(image_dir, filename) for filename in 
                           ['0246f44bb123ce3f91c939861eb97fb7.jpg',
                            '84728e78632c0910a69d33f82e62638c.jpg',
                            '8825e914555803f4c67b26593c9d5aff.jpg',
                            '91a5e8db15bccfb6cfa2df5e8b95ec03.jpg']]


In [0]:
# Function to read and prep images
image_size = 224
def read_and_prep_images(img_paths, img_height=image_size, img_width=image_size):
    imgs = [load_img(img_path, target_size=(img_height, img_width)) for img_path in img_paths]
    img_array = np.array([img_to_array(img) for img in imgs])
    return preprocess_input(img_array)

In [0]:
# 3 create model 
my_model = ResNet50(weights=model_weights)

test_data = read_and_prep_images(img_paths)
preds = my_model.predict(test_data)

In [0]:

def decode_predictions(preds, top=5, class_list_path=None):
  """Decodes the prediction of an ImageNet model.
  Arguments:
      preds: Numpy tensor encoding a batch of predictions.
      top: integer, how many top-guesses to return.
      class_list_path: Path to the canonical imagenet_class_index.json file
  Returns:
      A list of lists of top class prediction tuples
      `(class_name, class_description, score)`.
      One list of tuples per sample in batch input.
  Raises:
      ValueError: in case of invalid shape of the `pred` array
          (must be 2D).
  """
  if len(preds.shape) != 2 or preds.shape[1] != 1000:
    raise ValueError('`decode_predictions` expects '
                     'a batch of predictions '
                     '(i.e. a 2D array of shape (samples, 1000)). '
                     'Found array with shape: ' + str(preds.shape))
  CLASS_INDEX = json.load(open(class_list_path))
  results = []
  for pred in preds:
    top_indices = pred.argsort()[-top:][::-1]
    result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
    result.sort(key=lambda x: x[2], reverse=True)
    results.append(result)
  return results


In [0]:
# visualize predictions 

# Add directory holding utility functions to path to allow importing
sys.path.append(utils_dir)

most_likely_labels = decode_predictions(preds, top=3, class_list_path=class_dir)

for i, img_path in enumerate(img_paths):
    display(Image(img_path))
    print(most_likely_labels[i])

# Transfer Learning


In [0]:
pass

# 1 - model specification 
from tensorflow.python.keras.applications import ResNet50
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, GlobalAveragePooling2D

num_classes = 2
resnet_weights_path = '../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'

my_new_model = Sequential()
my_new_model.add(ResNet50(include_top=False, pooling='avg', weights=resnet_weights_path))
my_new_model.add(Dense(num_classes, activation='softmax'))

# Say not to train first layer (ResNet) model. It is already trained
my_new_model.layers[0].trainable = False

# 2 - compile model: 
my_new_model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

# 3 - fit model: 
from tensorflow.python.keras.applications.resnet50 import preprocess_input
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator

image_size = 224
data_generator = ImageDataGenerator(preprocessing_function=preprocess_input)


train_generator = data_generator.flow_from_directory(
        '../input/urban-and-rural-photos/rural_and_urban_photos/train',
        target_size=(image_size, image_size),
        batch_size=24,
        class_mode='categorical')

validation_generator = data_generator.flow_from_directory(
        '../input/urban-and-rural-photos/rural_and_urban_photos/val',
        target_size=(image_size, image_size),
        class_mode='categorical')

my_new_model.fit_generator(
        train_generator,
        steps_per_epoch=3,
        validation_data=validation_generator,
        validation_steps=1)

# output
# Found 72 images belonging to 2 classes.
# Found 20 images belonging to 2 classes.
# Epoch 1/1
# 3/3 [==============================] - 29s 10s/step - loss: 0.5130 - acc: 0.8056 - val_loss: 0.3568 - val_acc: 0.9000
# <tensorflow.python.keras._impl.keras.callbacks.History at 0x7f9f5bc56a20>

# Data Augmentation 
+ Apply data augmentation


# Deeper understanding of DL 

# From Scratch...  - Digit Recognizer... 

In [0]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Dropout


img_rows, img_cols = 28, 28
num_classes = 10

def data_prep(raw):
    out_y = keras.utils.to_categorical(raw.label, num_classes)

    num_images = raw.shape[0]
    x_as_array = raw.values[:,1:]
    x_shaped_array = x_as_array.reshape(num_images, img_rows, img_cols, 1)
    out_x = x_shaped_array / 255
    return out_x, out_y

train_file = "../input/digit-recognizer/train.csv"
raw_data = pd.read_csv(train_file)

x, y = data_prep(raw_data)

model = Sequential()
model.add(Conv2D(20, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(img_rows, img_cols, 1)))
model.add(Conv2D(20, kernel_size=(3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x, y,
          batch_size=128,
          epochs=2,
          validation_split = 0.2)

# /opt/conda/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
#   from ._conv import register_converters as _register_converters
# Train on 33600 samples, validate on 8400 samples
# Epoch 1/2
# 33600/33600 [==============================] - 56s 2ms/step - loss: 0.2454 - acc: 0.9292 - val_loss: 0.0823 - val_acc: 0.9749
# Epoch 2/2
# 33600/33600 [==============================] - 58s 2ms/step - loss: 0.0650 - acc: 0.9810 - val_loss: 0.0590 - val_acc: 0.9820
# <tensorflow.python.keras._impl.keras.callbacks.History at 0x7fa843394978>

# strides and dropout

In [0]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.python import keras
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Dropout

img_rows, img_cols = 28, 28
num_classes = 10

def data_prep(raw):
    out_y = keras.utils.to_categorical(raw.label, num_classes)

    num_images = raw.shape[0]
    x_as_array = raw.values[:,1:]
    x_shaped_array = x_as_array.reshape(num_images, img_rows, img_cols, 1)
    out_x = x_shaped_array / 255
    return out_x, out_y

train_size = 30000
train_file = "../input/digit-recognizer/train.csv"
raw_data = pd.read_csv(train_file)

x, y = data_prep(raw_data)

model = Sequential()
model.add(Conv2D(30, kernel_size=(3, 3),
                 strides=2,
                 activation='relu',
                 input_shape=(img_rows, img_cols, 1)))
model.add(Dropout(0.5))
model.add(Conv2D(30, kernel_size=(3, 3), strides=2, activation='relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x, y,
          batch_size=128,
          epochs=2,
          validation_split = 0.2)
# /opt/conda/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
#   from ._conv import register_converters as _register_converters
# Train on 33600 samples, validate on 8400 samples
# Epoch 1/2
# 33600/33600 [==============================] - 14s 408us/step - loss: 0.6243 - acc: 0.8026 - val_loss: 0.2235 - val_acc: 0.9354
# Epoch 2/2
# 33600/33600 [==============================] - 14s 417us/step - loss: 0.2664 - acc: 0.9176 - val_loss: 0.1315 - val_acc: 0.9630

# .END