In [1]:
import os
import time
import shutil

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix

import itertools

import matplotlib.pyplot as plt

import tensorflow.keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Flatten, Dense, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator #, array_to_img, img_to_array, load_img
from tensorflow.keras.layers import BatchNormalization
# from tensorflow.keras.convolutional import *
%matplotlib inline

Constants

In [2]:
SELF_PATH = os.getcwd()
PATH_TO_FILE_STREAM = 'G:\\Shared drives\\P-Sick'
PATH_TO_IMAGES = os.path.join(PATH_TO_FILE_STREAM, 'small\\Florence\\20180917a_jpgs\\jpgs')
PATH_TO_TAG_CSV = os.path.join(SELF_PATH, '../tagging_data.csv')

TEST_TO_TRAIN_RATIO = 0.3

TRAINING_IMAGE_FOLDER = "../_training_images"
TESTING_IMAGE_FOLDER = "../_testing_images"

These are a list of the ids for impact
```
NoneId:0  
SwashId:1  
CollisionId:2  
OverwashId:3  
InundationId:4
```

# Getting the images

1. We need to load the data from the csv
2. Split the images up into training and test set, and then place them in seperate folders.

First lets load the csv that has all the completely tagged image tags.

In [3]:
df_image_tags = pd.read_csv(PATH_TO_TAG_CSV)
df_impact_images = df_image_tags[['image_id','impact']]

In [4]:
df_training_images, df_testing_images = train_test_split(df_impact_images, test_size=TEST_TO_TRAIN_RATIO, random_state=420)
df_testing_images.head()

Unnamed: 0,image_id,impact
195,P26059362.jpg,1
122,P26056609.jpg,3
24,P26056111.jpg,3
108,P26048120.jpg,0
80,P26056256.jpg,2


Before we copy the images, remove all files within these folders

In [5]:
for file in os.listdir(TRAINING_IMAGE_FOLDER):
    os.remove(os.path.join(TRAINING_IMAGE_FOLDER,file)) 

for file in os.listdir(TESTING_IMAGE_FOLDER):
    os.remove(os.path.join(TESTING_IMAGE_FOLDER,file)) 

Copy our training and test images into their respective folders, while keeping metadata with copy2

In [6]:
for image in df_testing_images['image_id']:
    shutil.copy2(os.path.join(PATH_TO_IMAGES,image),  os.path.join(TRAINING_IMAGE_FOLDER,image))

for image in df_testing_images['image_id']:
    shutil.copy2(os.path.join(PATH_TO_IMAGES,image),  os.path.join(TESTING_IMAGE_FOLDER,image))

print("Moved training and test images to their respective folders")

Moved training and test images to their respective folders


# Preprocessing the images

1. For the training df, seperate the `image_id` and the `impact` into different np arrays for Keras

In [7]:
image_samples = np.array(range(0,len(df_training_images['impact'])))#df_impact_images['image_id'].to_numpy()
image_labels = df_training_images['impact'].to_numpy()

# Create Model

In [8]:
model = Sequential([
    Dense(16,input_shape = (1,),activation = 'relu'),
    Dense(32,activation = 'relu'),
    Dense(5, activation= 'softmax') # that five has to be same as number of classes
])

# Train Model

In [9]:
model.compile(Adam(lr=0.0001),loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
model.fit(image_samples,image_labels,batch_size = 5, epochs = 5, shuffle = True, verbose = 2, validation_split= TEST_TO_TRAIN_RATIO )

Train on 121 samples, validate on 52 samples
Epoch 1/5
121/121 - 0s - loss: 8.2012 - accuracy: 0.2562 - val_loss: 23.8097 - val_accuracy: 0.0962
Epoch 2/5
121/121 - 0s - loss: 7.3669 - accuracy: 0.2562 - val_loss: 21.2493 - val_accuracy: 0.0962
Epoch 3/5
121/121 - 0s - loss: 6.5535 - accuracy: 0.2562 - val_loss: 18.5886 - val_accuracy: 0.0962
Epoch 4/5
121/121 - 0s - loss: 5.7964 - accuracy: 0.2562 - val_loss: 15.9653 - val_accuracy: 0.0962
Epoch 5/5
121/121 - 0s - loss: 5.0643 - accuracy: 0.2562 - val_loss: 13.5220 - val_accuracy: 0.0962


<tensorflow.python.keras.callbacks.History at 0x2d7ddbb8668>

# Validate Model

In [10]:
# D:

# Predicting

In [11]:
test_labels = np.array(range(0,len(df_training_images['impact'])))
predictions = model.predict(test_labels,batch_size= 5, verbose= 2)

173/1 - 0s


In [12]:
for i in predictions[:5]:
    print(i)

[0.20356572 0.19951431 0.20196655 0.1952965  0.1996569 ]
[0.19356059 0.20665938 0.19735663 0.20239978 0.2000237 ]
[0.1823911  0.21323806 0.19124639 0.21257997 0.20054445]
[0.17159909 0.21968426 0.18503731 0.22292523 0.20075409]
[0.16119759 0.22597758 0.17875478 0.23341475 0.2006552 ]


In [13]:

predictions_rounded = model.predict_classes(test_labels,batch_size= 5, verbose= 2)
for i in predictions_rounded[:5]:
    print(i)

173/1 - 0s
0
1
1
3
3
