In [6]:
import pandas as pd
import numpy as np
import cv2
import os
import rasterio
from tensorflow.keras.models import load_model
import tensorflow as tf
from IPython.display import clear_output
from time import sleep

os.environ['PROJ_LIB'] = '/usr/share/proj'
os.environ['GDAL_DATA'] = '/usr/include/gdal'
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [8]:
bulk_order_name = 'Bulk Order Noel, MO' 

projectName = bulk_order_name[len('Bulk Order')+1:]
rawimageDirectory = f'/media/user/c250/bda/{bulk_order_name}/NAIP'
filename_ending_to_look_for = 'tif'




if projectName not in os.listdir():
    os.mkdir(projectName)
    
if 'images_to_check' not in os.listdir(projectName): 
    os.mkdir(f'{projectName}/images_to_check')
    
class_names = ['cafo', 'notcafo']
model = load_model('CAFO_model_3-29.h5');

df = pd.DataFrame(columns=['x','y','imageID','cat','epsgCoords', 'lat', 'long'])

imagesToOperateOn = []
for file in os.listdir(f'{rawimageDirectory}'):
    if file[-3:] == filename_ending_to_look_for:
        imagesToOperateOn.append(f'{rawimageDirectory}/{file}')

In [9]:
def Operation(filename):
    
    #opening a jp2 image and working with it as an array
    src = rasterio.open(f'{filename}') #  ,  driver='JP2OpenJPEG')

    srcRead = src.read()[:3,:,:].swapaxes(0, 1).swapaxes(1, 2)
    srcCRS = src.crs
    srcShape = srcRead.shape

    # keeping only white parts of the photo
    img = cv2.cvtColor(srcRead, cv2.COLOR_BGR2RGB)   # BGR -> RGB
    ret, mask = cv2.threshold(img[:, :,2], 200, 200, cv2.THRESH_BINARY)

    mask3 = np.zeros_like(img)
    mask3[:, :, 0] = mask
    mask3[:, :, 1] = mask
    mask3[:, :, 2] = mask

    img = cv2.bitwise_and(img, mask3)

    # getting rid of smaller white spots on the photo and making it to binary
    kernel = np.ones((10,10), np.uint8)
    img = cv2.erode(img, kernel, iterations=1)
    img = cv2.threshold(img , 140, 255, cv2.THRESH_BINARY)[1]

    # filtering out white spots that are close to each other and saving their indicies to a numpy array
    indicesToCheck = np.array(np.nonzero(img[:,:,0] > 0)) # take every white spot
    filter_ = np.diff(indicesToCheck) > 10 # only look at white spots that are far from each other
    indicesToCheck = indicesToCheck.transpose()[:-1,:][filter_[1] & filter_[0]] # apply the distance filter to the original array
    
    # here we add the indicies around areas of interest to ensure that the object of interest is observed a few different ways
    for i in indicesToCheck:
        iY = i[0]
        iX = i[1]

        for n in range(0,480,80): # this should be changed acccordingly depending on input images
            iY_prime = iY + n
            iX_prime = iX + n
            indicesToCheck = np.append(indicesToCheck, np.array( [[iY_prime, iX_prime]] ),axis=0)

            iY_prime = iY - n
            iX_prime = iX + n
            indicesToCheck = np.append(indicesToCheck, np.array( [[iY_prime, iX_prime]] ),axis=0)

            iY_prime = iY + n
            iX_prime = iX - n
            indicesToCheck = np.append(indicesToCheck, np.array( [[iY_prime, iX_prime]] ),axis=0)

            iY_prime = iY - n
            iX_prime = iX - n
            indicesToCheck = np.append(indicesToCheck, np.array( [[iY_prime, iX_prime]] ),axis=0)           
    
    step = 299 # this is a good step for jp2 compressed NAIP data

    
    
    # this loop will go over the indices of interestest found above and will append the indicies that detect something to a list
    DETECTED_COORDS = []
    for cords in indicesToCheck:
        
        #print(cords)
        
        y = cords[0]
        x = cords[1]
        
        # ensuring that any of the slices we make on the larger image array are within the bounds of that larger image array
        
        if x < 0:
            x = 0
            
        if y < 0:
            y = 0

        if y + step> srcShape[0]:
            y = srcShape[0]-1
            
        if x + step> srcShape[1]:
            x = srcShape[1]-1
        
        # slicing the original color image to predict its contents
        subArray = srcRead[y:y+step, x:x+step, :]

        # here we send each channel to 255 or zero
        subArrayModified = cv2.threshold( subArray , 170, 255, cv2.THRESH_BINARY)[1]

        # here we will only look at sub images that have significant amount of bright color in their image
        ret, mask = cv2.threshold( subArrayModified[:, :,2], 200, 200, cv2.THRESH_BINARY)
        mask3 = np.zeros_like(subArrayModified)
        mask3[:, :, 0] = mask
        mask3[:, :, 1] = mask
        mask3[:, :, 2] = mask
        subArrayModified = cv2.bitwise_and(subArrayModified, mask3)

        # we then make any colors white and check waht percent white is in the image
        subArrayModified = np.absolute(subArrayModified)
        subArrayModified[:,:,0][subArrayModified[:,:,1] > 0] = 255
        subArrayModified[:,:,0][subArrayModified[:,:,2] > 0] = 255

        percentWhite = (subArrayModified[:,:,0]/255).sum() / (subArrayModified[:,:,0].size) # 10 percent or more seems to be about right

        if percentWhite > 0.1:
        
            # making sure the slice has a significant amount of pixel information in it
            if subArray.shape[0] * subArray.shape[1] < (100*100):
                break

            # resizing the image to the size that the model was trained on
            subArray = cv2.resize(subArray, (299,299), interpolation = cv2.INTER_AREA)
            subArray = tf.expand_dims(subArray, axis=0) 

            # prediting the contents and assiging a category
            predictions = model.predict(subArray)

            # if a prediction is close then it will air on the side of caution and assign a prediction that the site is of interest
            scores = tf.nn.softmax(predictions[0]).numpy()
            scoreFilter = scores.max() - scores < 0.15
            if scoreFilter.sum() == 1:
                cat = np.array(class_names)[scoreFilter][0]
            else:
                classNamesFilter = np.array(class_names)[scoreFilter] != 'notcafo'
                cat = np.array(class_names)[scoreFilter][classNamesFilter][0]

            if cat != 'notcafo':

                DETECTED_COORDS.append([y, x])
        
        
        
    def ClusterReduction(array): # takes a numpy array and returns an array of reduced size by removing coords in close proximity
        # here we create a lists of clusters that are close to each other to decrease the amount of photos that we save for further processing
        DETECTED_COORDS = array
        clusters = []
        for coord in DETECTED_COORDS:
            distances = np.sqrt((abs(coord - DETECTED_COORDS)**2).sum(axis=1))

            #clusters.append(DETECTED_COORDS[distances < 299*2])   # this is the buffer change is occordingling
            clusters.append( DETECTED_COORDS[distances < 299 * 1.5] )


        # here we choose the most central coordinate in each cluster to keep
        CoordsToKeep = []
        for cluster in clusters:
            lowestClusterDistance = 999    
            for coord in cluster:
                distances = np.sqrt((abs(coord - cluster)**2).sum(axis=1))
                averageDistanceToNeighbors = distances.mean()

                if averageDistanceToNeighbors < lowestClusterDistance:
                    lowestClusterDistance = averageDistanceToNeighbors
                    coordToKeep = coord

            CoordsToKeep.append(coordToKeep)

        # np.unique throws an error if the list is empty so we check here that is it not empty
        if CoordsToKeep:
            CoordsToKeep = np.unique(CoordsToKeep, axis=0)

        return CoordsToKeep

    DETECTED_COORDS = np.array(DETECTED_COORDS)
    for n in range(4): # ClusterReduction is run mulitple times as some coords get caught in mulitple clusters
        DETECTED_COORDS = ClusterReduction(DETECTED_COORDS)
        
        
        
        
        
    for KEPTCoord in DETECTED_COORDS:

        x = KEPTCoord[1]
        y = KEPTCoord[0]
        
        subArray = srcRead[y:y+step, x:x+step, :]
        # making sure the slice has a significant amount of pixel information in it
        if subArray.shape[0] * subArray.shape[1] < (100*100):
            break
        # resizing the image to the size that the model was trained on
        subArray = cv2.resize(subArray, (299,299), interpolation = cv2.INTER_AREA)        
        
        epsgCoords = rasterio.transform.xy(src.transform, y+int(step/2), x+int(step/2)) # here we add a bit to get the transformed pixle coords at the center of the saved photo
        lat = epsgCoords[1]
        long = epsgCoords[0]     

        # creating a unqiue image id and saving the photo to a folder so it can later be gone over to ensure its contents are correct
        imgID = filename[-10:] + '_' + str(x) + '_' + str(y)
        cv2.imwrite(f'{projectName}/images_to_check/{imgID}.png', cv2.cvtColor(subArray, cv2.COLOR_RGB2BGR) )
        df.loc[len(df)] = y, x, imgID, cat, epsgCoords, lat, long
        print('Found')          

            
    srcCRS = src.crs
    src.close()
    return srcCRS

    ####

for image in imagesToOperateOn:
    clear_output(wait=True)
    print(imagesToOperateOn.index(image)/(len(imagesToOperateOn)))
    srcCRS = Operation(image)

0.9929577464788732
Found
Found
Found


In [10]:
src = rasterio.open(f'{image}')
df.to_csv(f'{projectName}/{projectName}_{srcCRS}_unfiltered.csv')