## Code to predict directories of images and create dataframe with results

In [3]:
#load up the basics
import os, shutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#image file issue
#https://stackoverflow.com/questions/48395076/keras-giving-oserror-image-file-is-truncated-26-bytes-not-processed
from PIL import ImageFile, Image
ImageFile.LOAD_TRUNCATED_IMAGES = True
Image.MAX_IMAGE_PIXELS = None


#Set GPU to use
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

#import TF stuff
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#Check TensorFlow Version
print('TF version: {}' .format(tf.__version__))

#Check for GPU utilization
if tf.test.gpu_device_name():
    print(' GPU: {}'.format(tf.test.gpu_device_name()))
else:
    print("No GPU")
    
#help w/ predict generator from: 
#https://stackoverflow.com/questions/57516673/how-to-perform-prediction-using-predict-generator-on-unlabeled-test-data-in-kera

TF version: 2.3.2
 GPU: /device:GPU:0


In [16]:
#Load up the TF model

#model path
model_path = './models/VGG16_model_6_8020split_512px'

# load model into tf
model = tf.keras.models.load_model(model_path, compile = True)

In [17]:
#flow from dataframe
#https://stackoverflow.com/questions/58116359/is-there-a-simple-way-to-use-only-half-of-the-images-in-underlying-directories-u
#https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator

In [18]:
# build the generator.

# here are the numbers for each storms, if we want to break those out for par. processing.
#(vs. running entire dir of all storms).
# Barry, 8900
# Delta, 9785
# Dorian, 24749
# Florence, 31434
# Gordon, 2019
# Isaias, 8938
# Laura, 24165
# Michael, 9600
# Sally, 6390
# Zeta, 8644
# pred_data_dir = '/data/NOAA_ERI/Florence'


#directory
pred_data_dir = '/data/NOAA_ERI/Ida'

batch_size = 32
numberofimages = 1966

#set Image size based on the model(RGB, so 3 channels)
pix_dim = 512
imsize = (pix_dim,pix_dim) 
imshape = (pix_dim,pix_dim,3)

#rescale the images
pred_datagen = ImageDataGenerator(rescale =1./255.)

# Flow images in batches 
pred_generator = pred_datagen.flow_from_directory(pred_data_dir,
                                                    batch_size = batch_size,
                                                    class_mode = None,
                                                    target_size = imsize,
                                                    shuffle = False)
#reset the generator counter
pred_generator.reset()


Found 1966 images belonging to 4 classes.


In [19]:
#make the predictions
prediction = model.predict(pred_generator,
                           verbose=1,
                           steps=numberofimages/batch_size)



In [20]:
#get the filenames
filenames = pred_generator.filenames

#convert the predictions to a list
PredList = prediction.tolist()

#and make a dataframe with filenames adn predictions
PredDF = pd.DataFrame({'Filename': filenames,'Prediction': PredList})

#PredDF.head()
#PredDF.dtypes

In [21]:
#remove the brackets from the prediction list
PredDF["Prediction"] = PredDF["Prediction"].str[0]

#Convert the predcit column to numeric
PredDF["Prediction"] = pd.to_numeric(PredDF["Prediction"])

#and check to see it worked
PredDF.head()

Unnamed: 0,Filename,Prediction
0,20210830b_jpgs/jpgs/021501-0830211921267-RGB1.jpg,0.003345
1,20210830b_jpgs/jpgs/021502-0830211921309-RGB1.jpg,0.073469
2,20210830b_jpgs/jpgs/021503-0830211921330-RGB1.jpg,0.018217
3,20210830b_jpgs/jpgs/021504-0830211921370-RGB1.jpg,0.08749
4,20210830b_jpgs/jpgs/021505-0830211921401-RGB1.jpg,0.017578


In [22]:
#save the data

#new csv to save
#newpredcsv = '../data/tables/Model6IdaPreds.csv'

#PredDF.to_csv(newpredcsv, index=False)

In [24]:
#read in the master DF
mdf = pd.read_csv('../data/tables/Ida.csv')

#rename column with pred#
PredDF = PredDF.rename({'Prediction': 'Prediction4'}, axis=1)
PredDF = PredDF.rename({'Filename': 'file'}, axis=1)

#merge to prediction dataframe
New_mdf = pd.merge(mdf,PredDF, on="file", how = "left")
New_mdf.head()

Unnamed: 0.1,Unnamed: 0,file,storm_id,archive,image,size,date,geom_checksum,lr_lat,ur_lat,lr_lon,ul_lon,ll_lat,ll_lon,ul_lat,ur_lon,Prediction4
0,0,20210902a_jpgs/jpgs/022207-0902211527418-RGB1.jpg,ida,20210902a_jpgs,022207-0902211527418-RGB1.jpg,37556223,2021/09/02,4f80fe478cd2abc6f622e6d36ff140a7,29.765383,29.756738,-90.138289,-90.132984,29.755715,-90.127919,29.750146,-90.146111,0.09894
1,1,20210902a_jpgs/jpgs/022085-0902211459342-RGB1.jpg,ida,20210902a_jpgs,022085-0902211459342-RGB1.jpg,29031644,2021/09/02,d159c2a55d897b2ae11c7e358a3cf92a,29.851308,29.861502,-90.811931,-90.829799,29.848129,-90.826695,29.854718,-90.816759,0.006127
2,2,20210902a_jpgs/jpgs/023219-0902211806471-RGB1.jpg,ida,20210902a_jpgs,023219-0902211806471-RGB1.jpg,36629896,2021/09/02,23c599ca3a5eb22ba1a71c6d3beb4f05,30.377359,30.369193,-89.405619,-89.394871,30.37535,-89.39451,30.37006,-89.406138,0.027929
3,3,20210902a_jpgs/jpgs/022026-0902211449173-RGB2.jpg,ida,20210902a_jpgs,022026-0902211449173-RGB2.jpg,29907448,2021/09/02,045cf576bc88af6edc94e658e8ce66bc,29.607164,29.617381,-90.678412,-90.696177,29.604067,-90.693127,29.61064,-90.68314,0.066364
4,4,20210902a_jpgs/jpgs/023207-0902211805491-RGB1.jpg,ida,20210902a_jpgs,023207-0902211805491-RGB1.jpg,40854854,2021/09/02,7ef5a5ff35ffdb6971246e122739c5ea,30.336122,30.32792,-89.408586,-89.397793,30.334109,-89.397439,30.328811,-89.409108,0.486647


In [25]:
New_mdf["Latitude"] = (New_mdf["lr_lat"]+New_mdf["ur_lat"] + New_mdf["ll_lat"] + New_mdf["ul_lat"])/4

In [26]:
New_mdf["Longitude"] = (New_mdf["lr_lon"]+New_mdf["ur_lon"] + New_mdf["ll_lon"] + New_mdf["ul_lon"])/4

In [27]:
#new csv to save
newmdfcsv = '../data/tables/IdaPred6.csv'

New_mdf.to_csv(newmdfcsv, index=False)

In [30]:
#read in the master DF
New_mdf = pd.read_csv('../data/tables/IdaPred6.csv')
New_mdf["labeled"] = np.nan
New_mdf['file'] = 'Ida/' + New_mdf['file']
New_mdf.head()

Unnamed: 0.1,Unnamed: 0,file,storm_id,archive,image,size,date,geom_checksum,lr_lat,ur_lat,lr_lon,ul_lon,ll_lat,ll_lon,ul_lat,ur_lon,Prediction5,Latitude,Longitude,labeled
0,0,Ida/20210902a_jpgs/jpgs/022207-0902211527418-R...,ida,20210902a_jpgs,022207-0902211527418-RGB1.jpg,37556223,2021/09/02,4f80fe478cd2abc6f622e6d36ff140a7,29.765383,29.756738,-90.138289,-90.132984,29.755715,-90.127919,29.750146,-90.146111,0.009178,29.756996,-90.136326,
1,1,Ida/20210902a_jpgs/jpgs/022085-0902211459342-R...,ida,20210902a_jpgs,022085-0902211459342-RGB1.jpg,29031644,2021/09/02,d159c2a55d897b2ae11c7e358a3cf92a,29.851308,29.861502,-90.811931,-90.829799,29.848129,-90.826695,29.854718,-90.816759,0.001248,29.853914,-90.821296,
2,2,Ida/20210902a_jpgs/jpgs/023219-0902211806471-R...,ida,20210902a_jpgs,023219-0902211806471-RGB1.jpg,36629896,2021/09/02,23c599ca3a5eb22ba1a71c6d3beb4f05,30.377359,30.369193,-89.405619,-89.394871,30.37535,-89.39451,30.37006,-89.406138,0.004279,30.37299,-89.400284,
3,3,Ida/20210902a_jpgs/jpgs/022026-0902211449173-R...,ida,20210902a_jpgs,022026-0902211449173-RGB2.jpg,29907448,2021/09/02,045cf576bc88af6edc94e658e8ce66bc,29.607164,29.617381,-90.678412,-90.696177,29.604067,-90.693127,29.61064,-90.68314,0.10744,29.609813,-90.687714,
4,4,Ida/20210902a_jpgs/jpgs/023207-0902211805491-R...,ida,20210902a_jpgs,023207-0902211805491-RGB1.jpg,40854854,2021/09/02,7ef5a5ff35ffdb6971246e122739c5ea,30.336122,30.32792,-89.408586,-89.397793,30.334109,-89.397439,30.328811,-89.409108,0.000537,30.33174,-89.403231,


In [31]:
# how many washover images?
predsWash = New_mdf[New_mdf["Prediction5"] >= 0.5]
predsWash.shape

(13, 20)

In [32]:
def Picker(Storm,roundNumber,digit,df):
    
    ######FIND THE IMAGES THAT ARE UNCERTAIN
    
    #select the storm
    StormSubset = df[df['file'].str.contains(str(Storm))]
    
    prediction = "Prediction" + str(digit - 1)
    
    #make new column certainty of prediction, from 0 to 0.5... 0 is smallest certainty
    StormSubset["certainty"] = abs(StormSubset[str(prediction)] - 0.5)
    
    #find the 100 that have not been labeled. change to 50 for round 6
    smallest = StormSubset[StormSubset.labeled.isna()].nsmallest(50, 'certainty')
    

    ######MARK THE IMAGES THAT ARE UNCERTAIN  
    
    #add labeled attribute to the dataframe, to denote that this image is slected to be labeled 
    #(number denotes the round that causes the image to be selected.
    smallest['Labeled'] = (digit)
    
    smolSubset = smallest[["file","Labeled"]]
    
    #merge to prediction dataframe
    df = pd.merge(df, smolSubset, on="file", how = "left")
    
    #combine Labeled and labeled
    df['labeled'].fillna(df['Labeled'], inplace=True)
    df = df.drop(columns=['Labeled'])
    
    ###### MOVE THE IMAGES THAT ARE UNCERTAIN
    
    #make a new dir
    newdir = '../data/pics/modeltoLabeler/' + str(roundNumber) + '/' + str(Storm) + str(digit)
    os.mkdir(newdir)
    
    smallest['Image_Path'] = '/data/NOAA_ERI/' + smallest['file'].astype(str)
    
    #pull out the list from the df
    MoveList= smallest['Image_Path'].tolist()

    #move those on the list
    for f in MoveList:
        shutil.copy(f, newdir)
        
    #print(smallest.head())   
    
    return df


In [34]:
df = Picker('Ida','roundSix', 6, New_mdf)

In [29]:
df.head()

Unnamed: 0.1,Unnamed: 0,file,storm_id,archive,image,size,date,geom_checksum,lr_lat,ur_lat,lr_lon,ul_lon,ll_lat,ll_lon,ul_lat,ur_lon,Prediction5,Latitude,Longitude,labeled
0,0,Ida/20210902a_jpgs/jpgs/022207-0902211527418-R...,ida,20210902a_jpgs,022207-0902211527418-RGB1.jpg,37556223,2021/09/02,4f80fe478cd2abc6f622e6d36ff140a7,29.765383,29.756738,-90.138289,-90.132984,29.755715,-90.127919,29.750146,-90.146111,0.009178,29.756996,-90.136326,
1,1,Ida/20210902a_jpgs/jpgs/022085-0902211459342-R...,ida,20210902a_jpgs,022085-0902211459342-RGB1.jpg,29031644,2021/09/02,d159c2a55d897b2ae11c7e358a3cf92a,29.851308,29.861502,-90.811931,-90.829799,29.848129,-90.826695,29.854718,-90.816759,0.001248,29.853914,-90.821296,
2,2,Ida/20210902a_jpgs/jpgs/023219-0902211806471-R...,ida,20210902a_jpgs,023219-0902211806471-RGB1.jpg,36629896,2021/09/02,23c599ca3a5eb22ba1a71c6d3beb4f05,30.377359,30.369193,-89.405619,-89.394871,30.37535,-89.39451,30.37006,-89.406138,0.004279,30.37299,-89.400284,
3,3,Ida/20210902a_jpgs/jpgs/022026-0902211449173-R...,ida,20210902a_jpgs,022026-0902211449173-RGB2.jpg,29907448,2021/09/02,045cf576bc88af6edc94e658e8ce66bc,29.607164,29.617381,-90.678412,-90.696177,29.604067,-90.693127,29.61064,-90.68314,0.10744,29.609813,-90.687714,
4,4,Ida/20210902a_jpgs/jpgs/023207-0902211805491-R...,ida,20210902a_jpgs,023207-0902211805491-RGB1.jpg,40854854,2021/09/02,7ef5a5ff35ffdb6971246e122739c5ea,30.336122,30.32792,-89.408586,-89.397793,30.334109,-89.397439,30.328811,-89.409108,0.000537,30.33174,-89.403231,
