## Code to predict directories of images and create dataframe with results

In [1]:
#load up the basics
import os, shutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#Set GPU to use
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

#import TF stuff
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#Check TensorFlow Version
print('TF version: {}' .format(tf.__version__))

#Check for GPU utilization
if tf.test.gpu_device_name():
    print(' GPU: {}'.format(tf.test.gpu_device_name()))
else:
    print("No GPU")
    
#help from: https://stackoverflow.com/questions/57516673/how-to-perform-prediction-using-predict-generator-on-unlabeled-test-data-in-kera

TF version: 2.3.2
 GPU: /device:GPU:0


In [2]:
#Load up the TF model

#model path
model_path = './models/VGG16_model_3_8020split_512px'

# load model into tf
model = tf.keras.models.load_model(model_path, compile = True)

In [None]:
#flow from dataframe
#https://stackoverflow.com/questions/58116359/is-there-a-simple-way-to-use-only-half-of-the-images-in-underlying-directories-u
#https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator

In [36]:
# build the generator
# Barry, 8900
# Delta, 9785, broken
# Dorian, 24749
# Florence, 31434, broken
# Gordon, 2019
# Isaias, 8938, broken
# Laura, 24165
# Michael, 9600
# Sally, 6390
# Zeta, 8644

#directory
pred_data_dir = '/data/NOAA_ERI/Zeta'

batch_size = 16
numberofimages = 8644

#set Image size based on the model(RGB so imshape is 3)
pix_dim = 512
imsize = (pix_dim,pix_dim) 
imshape = (pix_dim,pix_dim,3)

#rescale the images
pred_datagen = ImageDataGenerator(rescale =1./255.)

# Flow images in batches 
pred_generator = pred_datagen.flow_from_directory(pred_data_dir,
                                                    batch_size = batch_size,
                                                    class_mode = None,
                                                    target_size = imsize,
                                                    shuffle = False)
#reset the generator counter
pred_generator.reset()


Found 8644 images belonging to 5 classes.


In [37]:
#make the predictions
prediction = model.predict(pred_generator,
                           verbose=1,
                           steps=numberofimages/batch_size)



In [38]:
#convert the predictions to a list
PredList = prediction.tolist()

In [39]:
#get the filenames
filenames = pred_generator.filenames

#and make a dataframe with filenames adn predictions
PredDF = pd.DataFrame({'Filename': filenames,'Prediction': PredList})

In [40]:
PredDF.head()

Unnamed: 0,Filename,Prediction
0,20201029a_jpgs/jpgs/C30355621.jpg,[0.5550518035888672]
1,20201029a_jpgs/jpgs/C30355747.jpg,[0.9974620342254639]
2,20201029a_jpgs/jpgs/C30355754.jpg,[0.9374889731407166]
3,20201029a_jpgs/jpgs/C30355761.jpg,[0.9878569841384888]
4,20201029a_jpgs/jpgs/C30355768.jpg,[1.0]


In [41]:
PredDF.dtypes

Filename      object
Prediction    object
dtype: object

In [42]:
#remove the brackets from the prediction list
PredDF["Prediction"] = PredDF["Prediction"].str[0]

#Convert the predcit column to numeric
PredDF["Prediction"] = pd.to_numeric(PredDF["Prediction"])

#and check to see it worked
PredDF.head()

Unnamed: 0,Filename,Prediction
0,20201029a_jpgs/jpgs/C30355621.jpg,0.555052
1,20201029a_jpgs/jpgs/C30355747.jpg,0.997462
2,20201029a_jpgs/jpgs/C30355754.jpg,0.937489
3,20201029a_jpgs/jpgs/C30355761.jpg,0.987857
4,20201029a_jpgs/jpgs/C30355768.jpg,1.0


In [43]:
#save the data

#new csv to save
newpredcsv = '../data/tables/Model4All/Zeta.csv'

PredDF.to_csv(newpredcsv, index=False)