In [None]:
#Import Tensor Flow and Keras
import tensorflow as tf
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.preprocessing import image

#Import some image loading and image processing stuff.
import PIL 
import os
from PIL import Image
import numpy as np
import random
import photoSort as ps  #Photosort is a custom Python file, To be stored nearby.
import pandas as pd
import time
import csv

In [None]:
#Import the actual model we are going to be using.
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

#We couple also import some other model

#from keras.applications.resnet50 import ResNet50
#from keras.applications.resnet50 import preprocess_input

#from keras.applications.inception_v3 import InceptionV3
#from keras.applications.inception_v3 import preprocess_input

# Other models also avaliable, see:
# https://github.com/fchollet/keras/tree/master/keras/applications

In [None]:
#Load in our selected base model.
#Note that we tell the computer to use the weights calculated using the imagenet data set.
#We ask for only the convolution layers (top=false)
#And we requestion a max pooling layer on top. (as opposed to 'avg' or 'none')

#All three model loading functions use the same parameters.

baseModel = VGG16(weights='imagenet', include_top=False,pooling='max')
#baseModel = InceptionV3(weights='imagenet', include_top=False,pooling='max')
#baseModel = ResNet50(weights='imagenet', include_top=False,pooling='max')


In [None]:
    
## !! SET THESE PARAMETERS !! ##
NumIm = 45 # The number of images you want
            #Set this to -1 if you want to simply scrap ALL the images.
directory = 'CSVliveshere/AsFoundPhotoIndex-Table1.csv'
verbose=False

#Import CSV file
df = pd.read_csv(directory)

#Create an list with the row numbers of samples from the file.
maxInt = df.shape[0]-1 # The max entry of the csv file

In [None]:
if(NumIm>0):
    samples = random.sample(range(maxInt+1), NumIm) # This generates a random sample
else:
    samples = range(maxInt+1) # This generates a random sample

#Create arrays to throw our output in when we are done.
outLabels=np.zeros([len(samples),2])
outData= np.zeros([len(samples),baseModel.output.shape[1]])
listOfFilenames =[]
#Here the data scales in size based on output shape of our base model



In [None]:
Its = 0 #Iteration number
totalSize = 0 #total size of files downloaded.
low_number =0 #Number of low hazard files downloaded
high_number=0 #Number of high hazard files dowloaded


start = time.time()

# Loop through the samples and download the images into the correct folders (high or low)
for k in samples:
    photoLink = df['PHOTOLINK'][k]
    hazardRating = df['HAZARDMAPRATING'][k]
    
    category,size = ps.download_image_into_tmp(photoLink,hazardRating )
    if(verbose):
        print(photoLink)
        print(hazardRating)
        print('\n')
        
    listOfFilenames.append(photoLink)
    
    img = image.load_img('./tmp/temp.png', target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x) 
    #I believe this preprocess method is imported along with the model being used.
    #As such, it should take care of image size, etc for the most part.
    #However the exact methods used may effect output in some way.
    #For example, a different target size about may cause different results.
    #For that reason, it may be worth considering how pre-processing is done,
    #And how it effects your results.
    
    preds = baseModel.predict(x)
    outData[Its,:]=preds[0,:]
    
    if(category=='low'):
        outLabels[Its,0]=1
        low_number += 1
    else:
        outLabels[Its,1]=1
        high_number += 1
    Its += 1
    totalSize += size


end = time.time()

print('\n High Number: '+ str(high_number)+'  Fraction High:'+ str(float(high_number)/float(NumIm)))
print('\n Low Number: '+ str(low_number)+ '  Fraction Low'+ str(float(low_number)/float(NumIm)))
print('\n Total Download Size: '+str(totalSize)+' Mbs')
print('\n Total Total Time Taken: '+ str(round(end-start))+' seconds')

    # Now that image has been placed into tmp, load it, 

# Need to pull an image from the cloud
# And then based on the Hazard rating, give the appropriate label
# Low  = [1,0]
# High = [0,1]

In [None]:
np.save("SafetyWeights.npy",outData)
np.save("SafetyLabels.npy",outLabels)

outData.shape
#np.savetxt("SafetyWeights.csv",outData)
#np.savetxt("SafetyLabels.csv",outLabels)
#Note that because output files are purely numeric, saving as csv is
#Not particularly efficient. Saving as a purely numeric numpy file
#Significantly reduces file size.


In [None]:
with open('SafetyFilenames.csv', 'w', newline='') as myfile:
    wr =  csv.writer(myfile, delimiter=',')
    wr.writerows(listOfFilenames)
    
    