#### Setup

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import sys
from datetime import datetime

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [5]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1790661120769104968
]


### Split images into folders

In [2]:
'''
Parameters 
----------
Set for each test. 


img_folder: Root folder of image collection

results_file: JSON file for output of results and metadata

description: String for labeling/notes

sample_size: Sample size to pull from each csv, 0-1

img_size: Native resolution is 1280x1280

'''

img_folder = '../data/output_images/'

train_folder = '../data/output_images/train/'
test_folder = '../data/output_images/test/'
validation_folder = '../data/output_images/validation/'

sample_size = .1

img_size = (320,320)

In [3]:
'''
Loads csv only, no images.
'''

# Name of folder
names = [
    'Australia',
    'China',
    'Germany',
    'NewarkLR',
    'Switzerland',
    'Amtrak',
    'BostonMTBA',
    'DenverRTD',
    'LosAngelesMR',
    'SeattleLLR',
    'Netherlands'
]

# Name of csv
abbr = [
    'AUS',
    'CHN',
    'GRM',
    'NEW',
    'SWZ',
    'AMT',
    'BOS',
    'DEN',
    'LAA',
    'SEA',
    'NET'
]
locations = dict(zip(names,abbr))

# Collect each csv into one df adding railway name
frames = []
for key,value in locations.items():
    try:
        filename = img_folder+key+'/'+value+'.csv'
        tmp = pd.read_csv(filename,header=0)
        tmp['Railway'] = key
        
        # Take sample from each folder 
        tmp = tmp.sample(frac=sample_size).reset_index(drop=True)
        frames.append(tmp)
    except Exception as e:
        print(e)

df = pd.concat(frames)

df = df.dropna()
df['Catenary'] = df['Catenary'].astype(int)

df.head()

[Errno 2] File b'../data/output_images/China/CHN.csv' does not exist: b'../data/output_images/China/CHN.csv'


Unnamed: 0,Name,Longitude,Latitude,Catenary,Railway
0,153.03352180000002_-27.443969300000003,153.033522,-27.443969,1,Australia
1,172.6570924_-43.419457200000004,172.657092,-43.419457,1,Australia
2,151.19101840000002_-33.9324425,151.191018,-33.932443,1,Australia
3,151.2950169_-33.4968704,151.295017,-33.49687,0,Australia
4,151.1964198_-33.868837,151.19642,-33.868837,0,Australia


In [4]:
'''
Open known non-catenary lines and add differntial to df
'''

zeros = df.Catenary.value_counts()[0]
ones = df.Catenary.value_counts()[1]

names = [
    'Amtrak_non_cat_1',
    'Amtrak_non_cat_2',
    'Amtrak_non_cat_3'
]

abbr = [
    'ANC',
    'ANC2',
    'ANC3'
]
locations = dict(zip(names,abbr))

diff = ones - zeros

if diff > 0:
    frames = []
    for key,value in locations.items():
        try:
            filename = img_folder+key+'/'+value+'.csv'
            tmp = pd.read_csv(filename,header=0)
            tmp['Railway'] = key
            frames.append(tmp)
        except Exception as e:
            print(e)

    try:
        duds = pd.concat(frames)
        duds = duds.dropna()
        duds['Catenary'] = duds['Catenary'].astype(int) 
        
        duds = duds.sample(n=diff).reset_index(drop=True)
        df = pd.concat([df,duds]).reset_index(drop=True)
    except Exception as e:
        print(e)
        duds = duds.sample(len(duds.index.tolist())).reset_index(drop=True)
        df = pd.concat([df,duds]).reset_index(drop=True)
        
df.shape

(66, 5)

In [None]:
ones = df[df['Catenary']==1]
zeros = df[df['Catenary']==0]

In [None]:
'''
Load images into df
'''
rows = zeros.index.tolist()

images = []
for row in rows:
    img_path = img_folder+df.iloc[row]['Railway']+'/'+df.iloc[row]['Name']+'.png'
    img = Image.open(img_path).convert('RGBA')
    img.thumbnail(img_size, Image.ANTIALIAS)
#     data = np.asarray(img)
#     data = data.flatten()
    images.append(img)
    
zeros['Image'] = images

cols = ['Catenary','Image']
zeros = zeros[cols]

zeros.head()

In [None]:
len(zeros.index.tolist())

In [None]:
images = zeros['Image'].tolist()

i = 32
for image in images[288:]:
    image.save(validation_folder+str(i)+'.png')
    i += 1

### Loading images into tf

In [None]:
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    img_final = tf.image.resize_images(
        img_tensor,
        [192, 192],
        align_corners=False,
        preserve_aspect_ratio=False,
    )
    image /= 255.0  # normalize to [0,1] range

    return image

In [None]:
def load_and_preprocess_image(path):
    image = tf.read_file(path)
    return preprocess_image(image)

In [18]:
img_path = train_folder+'1/0.png'
print(img_path)

../data/output_images/train/1/0.png


In [19]:
img_raw = tf.io.read_file(img_path)
print(repr(img_raw)[:100]+"...")

<tf.Tensor 'ReadFile:0' shape=() dtype=string>...


In [20]:
img_tensor = tf.image.decode_image(img_raw)

print(img_tensor.shape)
print(img_tensor.dtype)

<unknown>
<dtype: 'uint8'>


In [None]:
img_path = all_image_paths[0]
label = all_image_labels[0]

plt.imshow(load_and_preprocess_image(img_path))
plt.grid(False)
plt.xlabel(caption_image(img_path).encode('utf-8'))
plt.title(label_names[label].title())
print()

In [21]:
img_final = tf.image.resize_images(
    img_tensor,
    [192, 192],
    align_corners=False,
    preserve_aspect_ratio=False,
)

ValueError: 'images' contains no shape.

In [None]:
labels = np.asarray(df.Catenary.tolist())
features = np.asarray(df.Image.tolist())

In [None]:
features = features.reshape(len(features),-1)

In [None]:
features.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features,labels,test_size = 0.20, random_state=42)

X_train, X_test = X_train / 255.0, X_test / 255.0

In [None]:
len(X_test)

In [None]:
with tf.device('/gpu:0'):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(32, activation=tf.nn.relu),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Flatten(input_shape=(28,28)),
        tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=5)

    model.evaluate(X_test, y_test)

In [None]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        '../data/output_images/train/',
        target_size=(66, 66),
        batch_size=32,
        class_mode='binary')

# validation_generator = test_datagen.flow_from_directory(
#         '../data/output_images/test',
#         target_size=(20, 20),
#         batch_size=32,
#         class_mode='binary')


# model = tf.keras.models.Sequential([
#         tf.keras.layers.Dense(32, activation=tf.nn.relu),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Flatten(input_shape=(28,28)),
#         tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
#     ])

# model.compile(optimizer='adam',
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])

# model.fit_generator(
#         train_generator,
#         steps_per_epoch=2000,
#         epochs=50,
#         validation_data=validation_generator,
#         validation_steps=800)

In [None]:
with tf.device('/gpu:0'):
    rows = df.index.tolist()

    images = []
    for row in rows:
        img_path = img_folder+df.iloc[row]['Railway']+'/'+df.iloc[row]['Name']+'.png'
        img = Image.open(img_path).convert('RGBA')
        img.thumbnail(img_size, Image.ANTIALIAS)
        data = np.asarray(img)
    #     data = data.flatten()
        # Append img instead of data if you want as image       
        images.append(data)

    df['Image'] = images

    cols = ['Catenary','Image']
    df = df[cols]

    labels = np.asarray(df.Catenary.tolist())
    features = np.asarray(df.Image.tolist())
    
    features = features.reshape(len(features),-1)
    
    X_train, X_test, y_train, y_test = train_test_split(features,labels,test_size = 0.20, random_state=42)

    X_train, X_test = X_train / 255.0, X_test / 255.0
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(32, activation=tf.nn.relu),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Flatten(input_shape=(28,28)),
        tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=5)

    model.evaluate(X_test, y_test)

### Basic Example

In [9]:
mnist = tf.keras.datasets.mnist

In [7]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [8]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [11]:
x_train.shape

(60000, 28, 28)

In [16]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [17]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.06793879783731537, 0.9786]