#### Setup

In [6]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import sys
from datetime import datetime

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


### Start Program

In [7]:
'''
Parameters 
----------
Set for each test. 


img_folder: Root folder of image collection

results_file: JSON file for output of results and metadata

description: String for labeling/notes

sample_size: Sample size to pull from each csv, 0-1

img_size: Native resolution is 1280x1280

'''

img_folder = '../data/output_images/'

train_folder = '../data/output_images/train/'
test_folder = '../data/output_images/test/'
validation_folder = '../data/output_images/validation/'

sample_size = 1

img_size = (1280,1280)

In [10]:
'''
Loads csv only, no images.
'''

# Name of folder
names = [
    'Australia',
    'China',
    'Germany',
    'NewarkLR',
    'Switzerland',
    'Amtrak',
    'BostonMTBA',
    'DenverRTD',
    'LosAngelesMR',
    'SeattleLLR',
    'Netherlands'
]

# Name of csv
abbr = [
    'AUS',
    'CHN',
    'GRM',
    'NEW',
    'SWZ',
    'AMT',
    'BOS',
    'DEN',
    'LAA',
    'SEA',
    'NET'
]
locations = dict(zip(names,abbr))

# Collect each csv into one df adding railway name
frames = []
for key,value in locations.items():
    try:
        filename = img_folder+key+'/'+value+'.csv'
        tmp = pd.read_csv(filename,header=0)
        tmp['Railway'] = key
        
        # Take sample from each folder 
        tmp = tmp.sample(frac=sample_size).reset_index(drop=True)
        frames.append(tmp)
    except Exception as e:
        print(e)

df = pd.concat(frames)

df = df.dropna()
df['Catenary'] = df['Catenary'].astype(int)

df.head()

[Errno 2] File b'../data/output_images/China/CHN.csv' does not exist: b'../data/output_images/China/CHN.csv'


Unnamed: 0,Name,Longitude,Latitude,Catenary,Railway
0,153.00315669999998_-27.5259447,153.003157,-27.525945,1,Australia
1,145.08054040000002_-37.851580299999995,145.08054,-37.85158,1,Australia
2,145.1196571_-37.875116999999996,145.119657,-37.875117,1,Australia
3,151.1451506_-33.8837061,151.145151,-33.883706,1,Australia
4,151.14737230000003_-33.879178499999995,151.147372,-33.879178,1,Australia


In [11]:
'''
Open known non-catenary lines and add differntial to df
'''

zeros = df.Catenary.value_counts()[0]
ones = df.Catenary.value_counts()[1]

names = [
    'Amtrak_non_cat_1',
    'Amtrak_non_cat_2',
    'Amtrak_non_cat_3'
]

abbr = [
    'ANC',
    'ANC2',
    'ANC3'
]
locations = dict(zip(names,abbr))

diff = ones - zeros

if diff > 0:
    frames = []
    for key,value in locations.items():
        try:
            filename = img_folder+key+'/'+value+'.csv'
            tmp = pd.read_csv(filename,header=0)
            tmp['Railway'] = key
            frames.append(tmp)
        except Exception as e:
            print(e)

    try:
        duds = pd.concat(frames)
        duds = duds.dropna()
        duds['Catenary'] = duds['Catenary'].astype(int) 
        
        duds = duds.sample(n=diff).reset_index(drop=True)
        df = pd.concat([df,duds]).reset_index(drop=True)
    except Exception as e:
        print(e)
        duds = duds.sample(len(duds.index.tolist())).reset_index(drop=True)
        df = pd.concat([df,duds]).reset_index(drop=True)
        
df.shape

(640, 5)

In [13]:
ones = df[df['Catenary']==1]
zeros = df[df['Catenary']==0]

In [22]:
'''
Load images into df
'''
rows = zeros.index.tolist()

images = []
for row in rows:
    img_path = img_folder+df.iloc[row]['Railway']+'/'+df.iloc[row]['Name']+'.png'
    img = Image.open(img_path).convert('RGBA')
    img.thumbnail(img_size, Image.ANTIALIAS)
#     data = np.asarray(img)
#     data = data.flatten()
    images.append(img)
    
zeros['Image'] = images

cols = ['Catenary','Image']
zeros = zeros[cols]

zeros.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Unnamed: 0,Catenary,Image
19,0,<PIL.Image.Image image mode=RGBA size=1280x128...
22,0,<PIL.Image.Image image mode=RGBA size=1280x128...
24,0,<PIL.Image.Image image mode=RGBA size=1280x128...
28,0,<PIL.Image.Image image mode=RGBA size=1280x128...
43,0,<PIL.Image.Image image mode=RGBA size=1280x128...


In [23]:
len(zeros.index.tolist())

320

In [26]:
images = zeros['Image'].tolist()

i = 32
for image in images[288:]:
    image.save(validation_folder+str(i)+'.png')
    i += 1

### Loading images into tf

In [None]:
labels = np.asarray(df.Catenary.tolist())
features = np.asarray(df.Image.tolist())

In [None]:
features = features.reshape(len(features),-1)

In [None]:
features.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features,labels,test_size = 0.20, random_state=42)

X_train, X_test = X_train / 255.0, X_test / 255.0

In [None]:
len(X_test)

In [None]:
with tf.device('/gpu:0'):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(32, activation=tf.nn.relu),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Flatten(input_shape=(28,28)),
        tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=5)

    model.evaluate(X_test, y_test)

In [None]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        '../data/output_images/train/',
        target_size=(66, 66),
        batch_size=32,
        class_mode='binary')

# validation_generator = test_datagen.flow_from_directory(
#         '../data/output_images/test',
#         target_size=(20, 20),
#         batch_size=32,
#         class_mode='binary')


# model = tf.keras.models.Sequential([
#         tf.keras.layers.Dense(32, activation=tf.nn.relu),
#         tf.keras.layers.Dropout(0.2),
#         tf.keras.layers.Flatten(input_shape=(28,28)),
#         tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
#     ])

# model.compile(optimizer='adam',
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])

# model.fit_generator(
#         train_generator,
#         steps_per_epoch=2000,
#         epochs=50,
#         validation_data=validation_generator,
#         validation_steps=800)

In [None]:
with tf.device('/gpu:0'):
    rows = df.index.tolist()

    images = []
    for row in rows:
        img_path = img_folder+df.iloc[row]['Railway']+'/'+df.iloc[row]['Name']+'.png'
        img = Image.open(img_path).convert('RGBA')
        img.thumbnail(img_size, Image.ANTIALIAS)
        data = np.asarray(img)
    #     data = data.flatten()
        # Append img instead of data if you want as image       
        images.append(data)

    df['Image'] = images

    cols = ['Catenary','Image']
    df = df[cols]

    labels = np.asarray(df.Catenary.tolist())
    features = np.asarray(df.Image.tolist())
    
    features = features.reshape(len(features),-1)
    
    X_train, X_test, y_train, y_test = train_test_split(features,labels,test_size = 0.20, random_state=42)

    X_train, X_test = X_train / 255.0, X_test / 255.0
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(32, activation=tf.nn.relu),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Flatten(input_shape=(28,28)),
        tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=5)

    model.evaluate(X_test, y_test)

### Basic Example

In [None]:
mnist = tf.keras.datasets.mnist

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [None]:
len(x_train)

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test, y_test)

### Loading data example

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
with tf.device('/gpu:0'):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
    c = tf.matmul(a, b)

with tf.Session() as sess:
    print (sess.run(c))