## Code to predict directories of images and create dataframe with results

In [1]:
#load up the basics
import os, shutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#image file issue
#https://stackoverflow.com/questions/48395076/keras-giving-oserror-image-file-is-truncated-26-bytes-not-processed
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True


#Set GPU to use
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

#import TF stuff
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#Check TensorFlow Version
print('TF version: {}' .format(tf.__version__))

#Check for GPU utilization
if tf.test.gpu_device_name():
    print(' GPU: {}'.format(tf.test.gpu_device_name()))
else:
    print("No GPU")
    
#help w/ predict generator from: 
#https://stackoverflow.com/questions/57516673/how-to-perform-prediction-using-predict-generator-on-unlabeled-test-data-in-kera

TF version: 2.3.2
 GPU: /device:GPU:0


In [2]:
#Load up the TF model

#model path
model_path = './models/VGG16_model_4_8020split_512px'

# load model into tf
model = tf.keras.models.load_model(model_path, compile = True)

In [3]:
#flow from dataframe
#https://stackoverflow.com/questions/58116359/is-there-a-simple-way-to-use-only-half-of-the-images-in-underlying-directories-u
#https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator

In [5]:
# build the generator.

# here are the numbers for each storms, if we want to break those out for par. processing.
#(vs. running entire dir of all storms).
# Barry, 8900
# Delta, 9785
# Dorian, 24749
# Florence, 31434
# Gordon, 2019
# Isaias, 8938
# Laura, 24165
# Michael, 9600
# Sally, 6390
# Zeta, 8644
# pred_data_dir = '/data/NOAA_ERI/Florence'


#directory
pred_data_dir = '/data/NOAA_ERI'

batch_size = 32
numberofimages = 134624

#set Image size based on the model(RGB, so 3 channels)
pix_dim = 512
imsize = (pix_dim,pix_dim) 
imshape = (pix_dim,pix_dim,3)

#rescale the images
pred_datagen = ImageDataGenerator(rescale =1./255.)

# Flow images in batches 
pred_generator = pred_datagen.flow_from_directory(pred_data_dir,
                                                    batch_size = batch_size,
                                                    class_mode = None,
                                                    target_size = imsize,
                                                    shuffle = False)
#reset the generator counter
pred_generator.reset()


Found 134624 images belonging to 11 classes.


In [None]:
#make the predictions
prediction = model.predict(pred_generator,
                           verbose=1,
                           steps=numberofimages/batch_size)

 558/4207 [==>...........................] - ETA: 6:27:18

In [None]:
#convert the predictions to a list
PredList = prediction.tolist()

In [None]:
#get the filenames
filenames = pred_generator.filenames

#and make a dataframe with filenames adn predictions
PredDF = pd.DataFrame({'Filename': filenames,'Prediction': PredList})

#PredDF.head()
#PredDF.dtypes

In [None]:
#remove the brackets from the prediction list
PredDF["Prediction"] = PredDF["Prediction"].str[0]

#Convert the predcit column to numeric
PredDF["Prediction"] = pd.to_numeric(PredDF["Prediction"])

#and check to see it worked
PredDF.head()

In [None]:
#save the data

#new csv to save
newpredcsv = '../data/tables/Model4AllPreds.csv'

PredDF.to_csv(newpredcsv, index=False)