#### Setup

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import os
import sys
from datetime import datetime
from pathlib import Path
from sklearn.model_selection import train_test_split

In [2]:
import tensorflow as tf
tf.enable_eager_execution()

from tensorflow.keras.preprocessing.image import ImageDataGenerator
tf.__version__

'1.13.1'

In [3]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [4]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 5241143534584657102
]


In [5]:
'''
Loads csv only, no images.
'''
def GetCSVs(sample_size):

    # Name of folder
    names = [
        'Australia',
        'China',
        'Germany',
        'NewarkLR',
        'Switzerland',
        'Amtrak',
        'BostonMTBA',
        'DenverRTD',
        'LosAngelesMR',
        'SeattleLLR',
        'Netherlands'
    ]

    # Name of csv
    abbr = [
        'AUS',
        'CHN',
        'GRM',
        'NEW',
        'SWZ',
        'AMT',
        'BOS',
        'DEN',
        'LAA',
        'SEA',
        'NET'
    ]
    locations = dict(zip(names,abbr))

    # Collect each csv into one df adding railway name
    frames = []
    for key,value in locations.items():
        try:
            filename = img_folder+key+'/'+value+'.csv'
            tmp = pd.read_csv(filename,header=0)
            tmp['Railway'] = key

            # Take sample from each folder 
            tmp = tmp.sample(frac=sample_size).reset_index(drop=True)
            frames.append(tmp)
        except Exception as e:
            print(e)

    df = pd.concat(frames)

    df = df.dropna()
    df['Catenary'] = df['Catenary'].astype(int)
    
    
    '''
    Open known non-catenary lines and add differntial to df
    '''


    zeros = df.Catenary.value_counts()[0]
    ones = df.Catenary.value_counts()[1]

    names = [
        'Amtrak_non_cat_1',
        'Amtrak_non_cat_2',
        'Amtrak_non_cat_3'
    ]

    abbr = [
        'ANC',
        'ANC2',
        'ANC3'
    ]

    locations['Amtrak_non_cat_1'] = 'ANC'
    locations['Amtrak_non_cat_2'] = 'ANC2'
    locations['Amtrak_non_cat_3'] = 'ANC3'

    locations2 = dict(zip(names,abbr))

    diff = ones - zeros

    if diff > 0:
        frames = []
        for key,value in locations2.items():
            try:
                filename = img_folder+key+'/'+value+'.csv'
                tmp = pd.read_csv(filename,header=0)
                tmp['Railway'] = key
                frames.append(tmp)
            except Exception as e:
                print(e)

        try:
            duds = pd.concat(frames)
            duds = duds.dropna()
            duds['Catenary'] = duds['Catenary'].astype(int) 

            duds = duds.sample(n=diff).reset_index(drop=True)
            df = pd.concat([df,duds]).reset_index(drop=True)
        except Exception as e:
            print(e)
            duds = duds.sample(len(duds.index.tolist())).reset_index(drop=True)
            df = pd.concat([df,duds]).reset_index(drop=True)
            
        return df

In [6]:
'''
Get image paths and labels as lists
'''
def GetPaths(df):

    rows = df.index.tolist()
    path = GetABSPath(img_folder)
    img_paths = []
    labels = []
    for row in rows:
        tmp = df.iloc[row]
        img_path = path+'/'+tmp.Railway+'/'+tmp.Name+'.png'
        img_paths.append(img_path)
        label = int(tmp.Catenary)
        labels.append(label)

    print(len(img_paths))
    
    return img_paths,labels

In [7]:
def GetABSPath(folder):
    return os.path.abspath(folder)

In [8]:
def PreprocessImage(img_path):
    
    img_raw = tf.io.read_file(img_path)
    image = tf.io.decode_png(img_raw, channels=3)
    image = tf.image.resize(image, [192,192])
    print(image.shape)
    image /= 255.0  # normalize to [0,1] range

    return image

In [9]:
def SplitDataSet(img_paths, labels):
    
    # split lists into training/test    
    X_train, X_test, y_train, y_test = train_test_split(img_paths,labels,test_size = .2, random_state=42)

    # Read images/labels into tensor data    
    train_path_ds = tf.data.Dataset.from_tensor_slices(X_train)
    train_image_ds = train_path_ds.map(PreprocessImage, num_parallel_calls=AUTOTUNE)
    train_label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(y_train, tf.int64))
    
    # Combine into dataset     
    train_image_label_ds = tf.data.Dataset.zip((train_image_ds, train_label_ds))
    
    
    test_path_ds = tf.data.Dataset.from_tensor_slices(X_test)
    test_image_ds = test_path_ds.map(PreprocessImage, num_parallel_calls=AUTOTUNE)
    test_label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(y_test, tf.int64))
    
    test_image_label_ds = tf.data.Dataset.zip((test_image_ds, test_label_ds))
    
    return train_image_label_ds, test_image_label_ds

In [10]:
def ClassifyImages(train_image_label_ds, test_image_label_ds):
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(32, activation=tf.nn.relu),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Flatten(input_shape=(192,192,3)),
        tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    steps_per_epoch = int(tf.ceil(len(img_paths)/batch_size).numpy())
    
    model.fit(train_image_label_ds,steps_per_epoch=steps_per_epoch, epochs=10)

    model.evaluate(test_image_label_ds,steps=steps_per_epoch)

In [11]:
'''
Shuffle/batch/prefetch/Set Range
'''
def ShuffleBatch(ds_dict,buff,BATCH_SIZE = 32):
    
    ds = ds_dict.shuffle(buffer_size = buff)
    ds = ds.repeat()
    ds = ds.batch(BATCH_SIZE)

    # `prefetch` lets the dataset fetch batches, in the background while the model is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    # ds

    def change_range(image,label):
        return 2*image-1, label

    keras_ds = ds.map(change_range)
    
    return keras_ds

## Start program

### Load from csv

In [12]:
img_folder = '../data/output_images/'

sample_size = 0.5

In [13]:
df = GetCSVs(sample_size)
print(df['Catenary'].value_counts())

[Errno 2] File b'../data/output_images/China/CHN.csv' does not exist: b'../data/output_images/China/CHN.csv'
1    163
0    163
Name: Catenary, dtype: int64


In [14]:
img_paths,labels = GetPaths(df)

326


### Create Model

In [15]:
'''
Split into train/test
'''

train_image_label_ds, test_image_label_ds = SplitDataSet(img_paths, labels)

(192, 192, 3)
(192, 192, 3)


In [16]:
'''
Shuffle & batch
'''

batch_size = 32

train_ds = ShuffleBatch(train_image_label_ds,len(img_paths),BATCH_SIZE = batch_size) 
test_ds = ShuffleBatch(test_image_label_ds,len(img_paths),BATCH_SIZE = batch_size)

#### Send ds to model

In [17]:
mobile_net = tf.keras.applications.MobileNetV2(input_shape=(192, 192, 3), include_top=False)
mobile_net.trainable=False

Instructions for updating:
Colocations handled automatically by placer.


In [18]:
model = tf.keras.Sequential([
  mobile_net,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(len(['0','1']), activation = 'softmax')
])

In [19]:
# The dataset may take a few seconds to start, as it fills its shuffle buffer.
image_batch, label_batch = next(iter(train_ds))

feature_map_batch = mobile_net(image_batch)
print(feature_map_batch.shape)

(32, 6, 6, 1280)


In [20]:
logit_batch = model(image_batch).numpy()

print("min logit:", logit_batch.min())
print("max logit:", logit_batch.max())
print()

print("Shape:", logit_batch.shape)

min logit: 0.06212671
max logit: 0.9378733

Shape: (32, 2)


In [21]:
model.compile(optimizer=tf.train.AdamOptimizer(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

In [22]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_192 (Model) (None, 6, 6, 1280)        2257984   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 2)                 2562      
Total params: 2,260,546
Trainable params: 2,562
Non-trainable params: 2,257,984
_________________________________________________________________


In [None]:
steps_per_epoch = int(tf.ceil(len(img_paths)/batch_size).numpy())

model.fit(train_ds, epochs=10, steps_per_epoch=steps_per_epoch)

model.evaluate(test_ds,steps=steps_per_epoch)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [None]:
ClassifyImages(train_ds, test_ds)

### Single Image Walkthrough

In [None]:
# Get filename and preview

img_path = image_paths[0]
plt.imshow(Image.open(img_path))
print(img_path)

In [None]:
img = PreprocessImage(img_path[0])
img

In [None]:
plt.imshow(img)

In [None]:
plt.imshow(PreprocessImage(img_path))

##### Build tf dataset

In [None]:
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)

In [None]:
print('shape: ', repr(path_ds.output_shapes))
print('type: ', path_ds.output_types)
print()
print(path_ds)

In [None]:
image_ds = path_ds.map(PreprocessImage, num_parallel_calls=AUTOTUNE)

In [None]:
plt.figure(figsize=(8,8))
for n,image in enumerate(image_ds.take(4)):
    plt.subplot(2,2,n+1)
    plt.imshow(image)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    plt.xlabel(caption_image(all_image_paths[n]))
    plt.show()

In [None]:
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64))

In [None]:
label_ds

In [None]:
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))

In [None]:
print(image_label_ds)

### Basic Example

In [None]:
mnist = tf.keras.datasets.mnist

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [None]:
x_train.shape

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test, y_test)