///////////////////////////////////////////////////////////////////////// // // © University of Southampton IT Innovation Centre, 2023 // // Copyright in this software belongs to University of Southampton // IT Innovation Centre of University Road, Southampton, GB SO17 1BJ, UK. // // This software may not be used, sold, licensed, transferred, copied // or reproduced in whole or in part in any manner or form or in or // on any media by any person other than in accordance with the terms // of the Licence Agreement supplied with the software, or otherwise // without the prior written consent of the copyright owners. // // This software is distributed WITHOUT ANY WARRANTY, without even the // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE, except where stated in the Licence Agreement supplied with // the software. // //      Created By :          Ioannis Matthaiou //      Created Date :        22/02/2023 //      Created for Project : GEODYNAMICS // /////////////////////////////////////////////////////////////////////////

RUN CODE FROM HERE Press ctrl + enter in each cell to run the code and observe output below.

In [None]:
# Install required libraries
!pip install tensorflow matplotlib opencv-python pandas

In [5]:
# Import required libraries
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import os
import cv2 as cv
import shutil
import numpy as np
%matplotlib inline

print("Current working directory: {0}".format(os.getcwd()))
# Current working directory must be something like: C:\Users\<FULL_PATH>\imgclass_test
# If not change the current working directory, for instance,
# os.chdir('C:\Users\<FULL_PATH>\imgclass_test')

Current working directory: C:\Users\yianniswork\Desktop


The following is a fixed set of parameters for loading the images. IMPORTANT: batch_size, img_width and img_height must never change.
I have included a sample of 10 images in the folder test_images. You can remove those if you don't need them. Any images that you need to use to test the model must be added to the "test_images" folder. Run the code in the cell below to process the images that are on the test_images folder. A new folder will be created and the processed images will be stored on the test_images_prep folder.

In [6]:
# Fixed set of parameters
img_width, img_height = (256, 256)
batch_size = 64
crop_imgs = 0 # select 1 if images must be cropped
stored_imgs_dir = './test_images/'
new_stored_imgs_dir = './test_images_prep/'

def cropimg(stored_imgs_dir,new_stored_imgs_dir,crop_imgs):  
    
    # Load image
    #First get the list of images in the folder
    stored_imgs_dir = stored_imgs_dir
    new_stored_imgs_dir = new_stored_imgs_dir
    list_of_original_images = os.listdir(stored_imgs_dir) # Path to images folder
    
    #Create a new directory to store the cropped images
    if not os.path.exists(new_stored_imgs_dir):
        print('Folder created with absolute path: '
              + os.path.abspath(new_stored_imgs_dir))
        os.mkdir(new_stored_imgs_dir)
    else:
        print('Folder already exists so any additional images will be added')
    
    # Cropping parameters
    xcropmin = 150
    ycropmin = 265
    mainwidth = 1000
    mainheight = 800
    
    #Iterate through the image_list
    for image_path in list_of_original_images:
        image_array = cv.imread(
            stored_imgs_dir+image_path, 
            cv.IMREAD_GRAYSCALE)     
        if crop_imgs == 1:
            height,width = image_array.shape
            cropped_image = image_array[ycropmin:mainheight,xcropmin:mainwidth] # Use array slicing to cut some part of the image
        else:
            cropped_image = image_array # No cropping
    
        # Write cropped image to Cropped Images folder
        cv.imwrite(new_stored_imgs_dir+image_path[:-3]+'jpg',cropped_image)

# Data preprocess using cropimg function and load using tensorflow pipeline
cropimg(stored_imgs_dir,
        new_stored_imgs_dir,crop_imgs)

Folder created with absolute path: C:\Users\yianniswork\Desktop\test_images_prep


Run the following cell to format images into an appropriate tensorflow pipeline

In [7]:
images = []
listofimgs = os.listdir(new_stored_imgs_dir)
Nimages = len(listofimgs)
for img in listofimgs:
    print(img)
    img = tf.keras.utils.load_img(
            os.path.join(new_stored_imgs_dir, img),
            color_mode="grayscale",
            target_size=(img_height, img_width),
            interpolation='nearest',
            keep_aspect_ratio=False
            )
    img = tf.keras.utils.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    images.append(img)
images = np.vstack(images)

plt_2022-02-04_02.36.22.116360.jpg
plt_2022-02-11_13.59.57.283316.jpg
plt_2022-02-12_14.00.28.444328.jpg
plt_2022-02-14_01.21.59.849543.jpg
plt_2022-02-15_05.46.24.982109.jpg
plt_2022-02-15_08.46.27.412065.jpg
plt_2022-02-15_10.07.28.529347.jpg
plt_2022-02-22_10.54.54.011347.jpg
plt_2022-03-22_01.55.26.747745.jpg
plt_2022-03-22_06.43.30.728986.jpg


Run the following cell to load the model from the saved_model folder and make predictions for each image. The code will generate a .csv file with the prediction results. The .csv file (named predictions_file.csv) will be located in the current working directory. There will be 4 columns on the predictions_file.csv. The first three are the probabilities of predicting the three classes: geophysical, not event and not geophysical:

'geophysical': Geophysical events, i.e. tremors, earthquakes, etc.
'not geophysical': Non geophysical events, i.e. whales, air guns, etc.
'not event': Only background noise
Remember: The .csv file must be closed before running the following cell.

The following cell will also create 4 different folders:

'geophysical': includes images predicted with high probability as being geophysical
'nongeophysical': includes images predicted with high probability as being not geophysical (whales, ships, etc.)
'nonevents': includes images predicted with high probability as being noise
'lowprobability': includes images NOT predicted with high probability as being one of the above three classes. These are images that may have multiple events in them and the algorithm cannot assign high probability to a single class for it to be classified.
The 'high probability' parameter may be changed by changing the abs_prob variable. Right now is at .75.

In [8]:
model_cnn = tf.keras.models.load_model('./10-04_18-14')
test_predictions = model_cnn.predict(images)
pred_prob = tf.nn.softmax(test_predictions) 
pred_prob = pred_prob.numpy()

separator = 'plt'
fileID = []
for i in range(len(pred_prob)):
    fileID.append(listofimgs[i].split(separator,1)[1])    

df = pd.DataFrame(pred_prob, 
                  columns = ['geophysical','not event','not geophysical'])
df['fileID'] = fileID
print(df)
print(type(df))
df.to_csv(
    './predictions_file.csv',
    float_format='%.2f',
    index=False) # Use Tab to seperate data

predictions_folder = './predictions_categories'
if os.path.exists(predictions_folder):
    shutil.rmtree(os.path.abspath(predictions_folder))
    print('Existing directory '+ predictions_folder +' has been deleted')
os.mkdir(predictions_folder)
    
path_geoph = predictions_folder+'/geophysical'
if not os.path.exists(path_geoph):
    os.mkdir(path_geoph)
path_nongeoph = predictions_folder+'/nongeophysical'
if not os.path.exists(path_nongeoph):
    os.mkdir(path_nongeoph)   
path_nonevent = predictions_folder+'/nonevents'
if not os.path.exists(path_nonevent):
    os.mkdir(path_nonevent)
path_lowprob = predictions_folder+'/lowprobability'
if not os.path.exists(path_lowprob):
    os.mkdir(path_lowprob)
    
abs_prob = 0.75

for i in range(Nimages):      
    if pred_prob[i,0] > abs_prob:   
        cv.imwrite(path_geoph+'/'+fileID[i],images[i]) 
    elif pred_prob[i,1] > abs_prob:
        cv.imwrite(path_nonevent+'/'+fileID[i],images[i])  
    elif pred_prob[i,2] > abs_prob:
        cv.imwrite(path_nongeoph+'/'+fileID[i],images[i])                        
    else:
        cv.imwrite(path_lowprob+'/'+fileID[i],images[i])

   geophysical  not event  not geophysical                           fileID
0          1.0        0.0              0.0  _2022-02-04_02.36.22.116360.jpg
1          1.0        0.0              0.0  _2022-02-11_13.59.57.283316.jpg
2          0.0        0.0              1.0  _2022-02-12_14.00.28.444328.jpg
3          1.0        0.0              0.0  _2022-02-14_01.21.59.849543.jpg
4          1.0        0.0              0.0  _2022-02-15_05.46.24.982109.jpg
5          0.0        0.0              1.0  _2022-02-15_08.46.27.412065.jpg
6          1.0        0.0              0.0  _2022-02-15_10.07.28.529347.jpg
7          0.0        1.0              0.0  _2022-02-22_10.54.54.011347.jpg
8          0.0        0.0              1.0  _2022-03-22_01.55.26.747745.jpg
9          0.0        0.0              1.0  _2022-03-22_06.43.30.728986.jpg
<class 'pandas.core.frame.DataFrame'>


The following code may be used to make predictions for a single image (located on the test_images_prep folder), by changing the img_fileID.

In [None]:
test_data_dir = './test_images_prep'
img_fileID = '/plt_2022-02-04_02.36.22.116360.jpg'
imgtest_or = tf.keras.utils.load_img(
    test_data_dir+img_fileID,
    color_mode="grayscale",    
    target_size=(img_width,img_height),
    interpolation='nearest',
    keep_aspect_ratio=False
    )
plt.imshow(imgtest_or)
plt.show()
x = tf.keras.utils.img_to_array(imgtest_or)
x = np.expand_dims(x, axis=0)
test_prediction = model_cnn.predict(x, batch_size=batch_size)
pred_prob_sngl = tf.nn.softmax(test_prediction[0,:])
pred_prob_sngl = pred_prob_sngl.numpy()
print(f"Model predicts image with file name: " + img_fileID[1:] + " as being: ")
print(f"geophysical event with probability = {100 * (pred_prob_sngl[0]):.2f}%")
print(f"non event / noise with probability = {100 * (pred_prob_sngl[1]):.2f}%")
print(f"non geophysical event with probability = {100 * (pred_prob_sngl[2]):.2f}%")