## Convert time series to image 
Based on https://gist.github.com/oguiza/c9c373aec07b96047d1ba484f23b7b47
and https://www.tensorflow.org/tutorials/load_data/images
and https://www.tensorflow.org/tutorials/images/transfer_learning

In [1]:
%config IPCompleter.greedy=True
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from sklearn.preprocessing import minmax_scale

df_train = pd.read_csv('train_kaggle.csv')
df_test = pd.read_csv('sample_solution.csv')
Y = df_train['Label'].values

## Save as real image

In [15]:
import pathlib
data_dir = pathlib.Path('image/train')
image_count = len(list(data_dir.glob('*/*.png')))
split_count = int(0.2 * image_count)
split_count

18

In [2]:
import tensorflow as tf
train_ds = tf.data.Dataset.list_files(str('image/train/*'))
test_ds = tf.data.Dataset.list_files(str('image/test/*'))

In [16]:
valid_ds = train_ds.take(split_count)
train_ds = train_ds.skip(split_count)

In [17]:
for f in train_ds.take(5):
  print(f.numpy())

b'image\\train\\1\\65.png'
b'image\\train\\1\\69.png'
b'image\\train\\0\\32.png'
b'image\\train\\0\\79.png'
b'image\\train\\0\\90.png'


In [18]:
CLASS_NAMES = ['0','1']

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, '\\')
    # The second to last is the class-directory
    return parts[-2] == CLASS_NAMES

def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    return img

def process_path_image(file_path):
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img

def process_path(file_path):
    label = get_label(file_path)
    # load the raw data from the file as a string
    img = process_path_image(file_path)
    return img, label

In [5]:
get_label('image\\train\\0\\2120.png')

<tf.Tensor: id=129, shape=(2,), dtype=bool, numpy=array([ True, False])>

In [19]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
valid_ds = valid_ds.map(process_path, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(process_path_image, num_parallel_calls=AUTOTUNE)

In [23]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000

In [25]:
train_batches = train_ds.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
valid_batches = valid_ds.batch(BATCH_SIZE)
test_batches = test_ds.batch(BATCH_SIZE)

In [28]:
for image_batch, label_batch in train_batches.take(1):
    pass

image_batch.shape

UnknownError: {{function_node __inference_Dataset_map_process_path_395}} NewRandomAccessFile failed to Create/Open: image\train\1 : Access is denied.
; Input/output error
	 [[{{node ReadFile}}]] [Op:IteratorGetNextSync]

In [29]:
IMG_SIZE = image_batch.shape[1]

NameError: name 'image_batch' is not defined

In [30]:
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

NameError: name 'IMG_SIZE' is not defined

In [None]:
# Let's take a look at the base model architecture
base_model.summary()

In [None]:
feature_batch = base_model(image_batch)
print(feature_batch.shape)

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
prediction_layer = tf.keras.layers.Dense(2, activation='softmax')
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

In [None]:
YTest = model.predict(XTest)
df_test['Predicted'] = YTest[:, 0]
df_test.to_csv('test.csv', index=False)

## Feature extraction

In [None]:
base_model.trainable = False


In [None]:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.AUC(name='auc'),
]

model = tf.keras.Sequential([
  base_model,
  global_average_layer,
  prediction_layer
])

base_learning_rate = 0.0001
model.compile(loss='categorical_crossentropy',
              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              metrics=METRICS)

model.summary()

In [None]:
num_train, num_val, num_test = (
  metadata.splits['train'].num_examples*weight/10
  for weight in SPLIT_WEIGHTS
)

In [None]:
initial_epochs = 10
steps_per_epoch = round(num_train)//BATCH_SIZE
validation_steps = 20

loss0,accuracy0 = model.evaluate(validation_batches, steps = validation_steps)

In [None]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

In [None]:
history = model.fit(train_batches,
                    epochs=initial_epochs,
                    validation_data=valid_batches)

In [None]:
YTest = model.predict(XTest)
df_test['Predicted'] = YTest[:, 0]
df_test.to_csv('test.csv', index=False)

## Fine Tuning

In [88]:
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable =  False

Number of layers in the base model:  155


In [101]:
len(model.trainable_variables)

58

In [102]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_auc', 
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)


In [None]:
EPOCHS = 100
baseline_history = model.fit(
        train_batches,
        epochs=EPOCHS,
        callbacks = [early_stopping],
        validation_data=valid_batches)

Epoch 1/100
     12/Unknown - 41s 41s/step - loss: 0.8197 - accuracy: 0.4375 - auc: 0.42 - 81s 40s/step - loss: 0.7096 - accuracy: 0.6094 - auc: 0.55 - 116s 39s/step - loss: 0.6777 - accuracy: 0.6458 - auc: 0.605 - 150s 37s/step - loss: 0.6427 - accuracy: 0.6719 - auc: 0.674 - 185s 37s/step - loss: 0.6431 - accuracy: 0.6687 - auc: 0.675 - 221s 37s/step - loss: 0.6601 - accuracy: 0.6458 - auc: 0.647 - 256s 37s/step - loss: 0.6619 - accuracy: 0.6384 - auc: 0.644 - 291s 36s/step - loss: 0.6494 - accuracy: 0.6719 - auc: 0.665 - 325s 36s/step - loss: 0.6429 - accuracy: 0.6806 - auc: 0.676 - 361s 36s/step - loss: 0.6444 - accuracy: 0.6719 - auc: 0.674 - 396s 36s/step - loss: 0.6381 - accuracy: 0.6818 - auc: 0.687 - 431s 36s/step - loss: 0.6356 - accuracy: 0.6875 - auc: 0.6920

In [None]:
YTest = model.predict(XTest)
df_test['Predicted'] = YTest[:, 0]
df_test.to_csv('test.csv', index=False)