# PARAMETERS

###Update: 

**True** if you only want to add new classifications

**False** if you want to update all classifications 

###Fresh: 

**True** if this is a fresh running of bulk autoclass (Classification DB isnt created yet)

**False** if this is not a fresh run|

In [None]:
update = True #@param {type:"boolean"}
fresh = False #@param {type:"boolean"}

# Installing GDAL

In [None]:
%%capture
!apt-get update
!apt-get install libgdal-dev -y
!apt-get install python-gdal -y
!apt-get install python-numpy python-scipy -y
!pip install rasterio
!pip install fiona
!pip install geopandas
import gdal 

# Importing Libraries

In [None]:
from tqdm.autonotebook import tqdm
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import rasterio
from descartes import PolygonPatch
from rasterio.plot import show
import matplotlib as mpl
import geopandas
import fiona

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

print("TF version:", tf.__version__)
print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")

TF version: 2.3.0
GPU is NOT AVAILABLE


# Loading Data from Drive





In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from tensorflow.keras.models import load_model

!cp "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Machine Learning/Models/mvnmv4_merced_bright.zip" .
!unzip mvnmv4_merced_bright.zip 

#Set model location
model = "/content/mvnmv4_merced/"
model = load_model(model)

cp: cannot stat '/content/drive/Shared drives/SIO and E4E Mangroves /Data/Machine Learning/Models/mvnmv4_merced_bright.zip': No such file or directory
Archive:  mvnmv4_merced_bright.zip
replace mvnmv4_merced/variables/variables.index? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: mvnmv4_merced/variables/variables.index  
  inflating: mvnmv4_merced/variables/variables.data-00001-of-00002  
  inflating: mvnmv4_merced/variables/variables.data-00000-of-00002  
  inflating: mvnmv4_merced/saved_model.pb  


In [None]:
from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True


def retile(full_path):
    name = os.path.basename(full_path)
    file_string = "\"" + full_path + "\""
    name_string = "\"" + os.path.basename(full_path) + "\""

    print("Downloading {}".format(name))
    !cp {file_string} .

    !mkdir /content/images
    !mkdir /content/images/images

    print("Retiling {}".format(name))
    call = "gdal_retile.py -ps 256 256 -targetDir /content/images/images/ /content/" + name
    !{call}

    !rm {name}

    img_dir = "/content/images/images/"
    ImageFile.LOAD_TRUNCATED_IMAGES = True

    for filename in tqdm(os.listdir(img_dir)):
        filepath = os.path.join(img_dir, filename)
        if os.path.splitext(filename)[1] == ".tif":
            with Image.open(filepath) as im:
                x, y = im.size
                totalsize = x*y
                totalsum = np.sum(np.array(im))
            if totalsize < (int(256) * (int(256))):
                os.remove(filepath)
            elif np.array_equal(np.unique(np.array(im)), [0, 255]):
                os.remove(filepath)

In [None]:
#Since the original model outputs the values from the last dense layer (no final activation), we need to definte the sigmoid function for predicted class conditional probabilities
def sigmoid(x):
    return 1/(1 + np.exp(-x)) 

In [None]:
from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

def classify_tiles(image_directory):
    print("Classifying tiles")
    #Read images using keras and split into batches
    image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
    data_gen = image_generator.flow_from_directory(directory=image_directory,
                                                        batch_size=32,
                                                        shuffle=False,
                                                        target_size=(256, 256))
    #Set up dataframe that will hold classifications
    column_names = ["prediction","p_0","p_1","filename"]
    result_df = pd.DataFrame(columns=column_names)

    #predict probabilities from model for the batches
    predictions = model.predict(data_gen)

    #associate filenames and classification for each prediction
    for i,prediction in tqdm(enumerate(predictions)):
        result_df.loc[i,"filename"] = data_gen.filenames[i]

        #calculating predictions 
        result_df.loc[i,"p_0"] = sigmoid(prediction[0])
        result_df.loc[i,"p_1"] = sigmoid(prediction[1])
        
        #getting final class prediction
        result_df.loc[i,"prediction"] = np.argmax(prediction)
    
    return result_df

In [None]:
def generate_probtiles(result_df):
    print("Generating tiles for probability plot")
    for index, sample in tqdm(result_df.iterrows()):
        #loading original image
        original = os.path.abspath(os.path.join("images", sample["filename"]))
        img = rasterio.open(original)

        #creating new raster mask with pixel values of conditional probability
        mask = sample["p_0"] * np.ones(shape=(img.width, img.height))

        #saving file output to new file
        filename = "prob_" + os.path.basename(sample["filename"])
        output = os.path.abspath(os.path.join("images", os.path.dirname(sample["filename"]), filename))
        #creates new file with projection of past image
        with rasterio.open(output,'w',driver='GTiff',height=img.height,width=img.width,count=1,dtype=mask.dtype,crs='+proj=latlong',transform=img.transform,) as dst:dst.write(mask, 1)

In [None]:
def move_tiles(result_df):
    print("Moving M/NM Tiles into folders")
    dest_folders = []
    #Organize tiles into folders
    for index, row in result_df.iterrows():
        cur_file = "/content/images/" + row['filename']
        cur_file = cur_file.replace("jpg","tif",2)
        classification = row['prediction'] 

        #set destination folder, and creates the folder if it doesn't exist
        dest_folder = os.path.join(os.path.abspath(image_directory),str(classification))
        dest_folders.append(dest_folder)
        if os.path.exists(dest_folder) == False:
            os.mkdir(dest_folder)
        dest = os.path.join(dest_folder,os.path.basename(cur_file))

        #moves file
        src = cur_file
        os.rename(src, dest)

In [None]:
def fix_shp(filename):
    shp = geopandas.read_file(filename)
    for index, feature in tqdm(shp.iterrows()):
        if feature["DN"] == 0:
            shp.drop(index, inplace=True)
    shp.to_file(filename)
    return shp

In [None]:
def create_files(full_path):
    #remove any already existing files in the path

    file = os.path.basename(full_path)
    folder = os.path.dirname(full_path)
    name = os.path.splitext(file)[0]
    dest_folder = os.path.join(folder,name+'_classifications/')

    mtif_name = 'm_' + name + '.tif'
    mshp_name = 'm_' + name + '.shp'

    nmtif_name = 'nm_' + name + '.tif'
    nmshp_name = 'nm_' + name + '.shp'

    ptif_name =  'prob_' + name + '.tif'
    #plot_name =  'plot_' + name + '.png'

    !mkdir {"\"" + dest_folder + "\""}


    print("Creating Orthomosaics")
    #recombines classified tiles for each class
    !gdal_merge.py -o /content/{nmtif_name} /content/images/1/*
    !rm /content/images/1/*.tif
    !gdal_polygonize.py /content/{nmtif_name} -f "ESRI Shapefile" -b 4 {nmshp_name}
    shp = fix_shp(nmshp_name)


    #getting filenames and locations
    nmtif_name = 'nm_' + name + '.tif'
    nmshp_name = 'nm_' + name + '.shp'
    nmshx_name = 'nm_' + name + '.shx'
    nmdbf_name = 'nm_' + name + '.dbf'
    nmprj_name = 'nm_' + name + '.prj'
    nmcpg_name = 'nm_' + name + '.cpg'

    nmtif_dest = os.path.join(dest_folder, nmtif_name)
    nmshp_dest = os.path.join(dest_folder, nmshp_name)
    nmshx_dest = os.path.join(dest_folder, nmshx_name)
    nmdbf_dest = os.path.join(dest_folder, nmdbf_name)
    nmprj_dest = os.path.join(dest_folder, nmprj_name)
    nmcpg_dest = os.path.join(dest_folder, nmcpg_name)

    print("Uploading Non-Mangrove Files")

    #copying over files to the drive
    !cp {"\"" + nmtif_name + "\""} {"\"" + nmtif_dest + "\""}
    !cp {"\"" + nmshp_name + "\""} {"\"" + nmshp_dest + "\""}
    !cp {"\"" + nmshx_name + "\""} {"\"" + nmshx_dest + "\""}
    !cp {"\"" + nmdbf_name + "\""} {"\"" + nmdbf_dest + "\""}
    !cp {"\"" + nmprj_name + "\""} {"\"" + nmprj_dest + "\""}
    !cp {"\"" + nmcpg_name + "\""} {"\"" + nmcpg_dest + "\""}

    !rm {nmtif_name}
    !rm {nmshp_name}
    !rm {nmshx_name}
    !rm {nmdbf_name}
    !rm {nmprj_name}
    !rm {nmcpg_name}


    !gdal_merge.py -o /content/{mtif_name} /content/images/0/*
    !rm /content/images/0/*.tif
    !gdal_polygonize.py /content/{mtif_name} -f "ESRI Shapefile" -b 4 {mshp_name}
    shp = fix_shp(mshp_name)

    mtif_name = 'm_' + name + '.tif'
    mshp_name = 'm_' + name + '.shp'
    mshx_name = 'm_' + name + '.shx'
    mdbf_name = 'm_' + name + '.dbf'
    mprj_name = 'm_' + name + '.prj'
    mcpg_name = 'm_' + name + '.cpg'

    mtif_dest = os.path.join(dest_folder, mtif_name)
    mshp_dest = os.path.join(dest_folder, mshp_name)
    mshx_dest = os.path.join(dest_folder, mshx_name)
    mdbf_dest = os.path.join(dest_folder, mdbf_name)
    mprj_dest = os.path.join(dest_folder, mprj_name)
    mcpg_dest = os.path.join(dest_folder, mcpg_name)

    print("Uploading Mangrove Files")

    !cp {"\"" + mtif_name + "\""} {"\"" + mtif_dest + "\""}
    !cp {"\"" + mshp_name + "\""} {"\"" + mshp_dest + "\""}
    !cp {"\"" + mshx_name + "\""} {"\"" + mshx_dest + "\""}
    !cp {"\"" + mdbf_name + "\""} {"\"" + mdbf_dest + "\""}
    !cp {"\"" + mprj_name + "\""} {"\"" + mprj_dest + "\""}
    !cp {"\"" + mcpg_name + "\""} {"\"" + mcpg_dest + "\""}

    !rm {mtif_name}
    !rm {mshp_name}
    !rm {mshx_name}
    !rm {mdbf_name}
    !rm {mprj_name}
    !rm {mcpg_name}

    #probability tiles remain unmoved, so just get all the leftover tiles
    !gdal_merge.py -o /content/{ptif_name} /content/images/images/*
    !rm -rf /content/images/

    
    ptif_name =  'prob_' + name + '.tif'
    plot_name =  'plot_' + name + '.png'

    ptif_dest = os.path.join(dest_folder, ptif_name)

    print("Uploading Other Files")

    !cp {"\"" + ptif_name + "\""} {"\"" + ptif_dest + "\""}

    !rm {ptif_name}


In [None]:
def run(full_path,image_directory):

    #removing all files just in case
    !rm *.tif
    !rm *.png
    !rm *.cpg
    !rm *.dbf
    !rm *.prj
    !rm *.shp
    !rm *.shx

    #clear folder and run all functions for final running
    !rm -rf images
    retile(full_path)
    result_df = classify_tiles(image_directory)
    generate_probtiles(result_df)
    move_tiles(result_df)
    del result_df
    create_files(full_path)

In [None]:
'''
#for resetting pkl and db when bugs occur :)

stats_pkl = "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Classification_statistics.pkl"
stats_location = "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Classification_statistics.xlsx"

#class_stats = pd.read_pickle(stats_pkl)
#class_stats = class_stats.iloc[:-27]
class_stats = class_stats.drop(41)

class_stats.to_excel(stats_location)
pd.to_pickle(class_stats, stats_pkl)
class_stats
'''

'\n#for resetting pkl and db when bugs occur :)\n\nstats_pkl = "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Classification_statistics.pkl"\nstats_location = "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Classification_statistics.xlsx"\n\n#class_stats = pd.read_pickle(stats_pkl)\n#class_stats = class_stats.iloc[:-27]\nclass_stats = class_stats.drop(41)\n\nclass_stats.to_excel(stats_location)\npd.to_pickle(class_stats, stats_pkl)\nclass_stats\n'

In [None]:
from datetime import datetime
str(datetime.now())

'2020-08-13 04:15:40.772139'

In [None]:
from datetime import datetime

columns = ["filename","filesize (GB)","last_updated","full_path"]
stats_pkl = "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Classification_statistics.pkl"

if fresh:
    class_stats = pd.DataFrame(columns=columns)
else:
    class_stats = pd.read_pickle(stats_pkl)

file_list = []
i = 0

#populating the classification db with new images (or all of them if a fresh run)
for root, dirs, files in os.walk("/content/drive/Shared drives/SIO and E4E Mangroves /Data/Orthomosaics/"):
    for file in files:
        if not(("dem" in file) or ("DEM" in file) or ("downsampled" in file) or ("CNN_prob" in file) or ("kompsat" in file) or ("planetscope" in file)) and not(file.startswith("un_")) and not(file.endswith("CNN_nm.tif")) and not(file.endswith("CNN_m.tif")) and not(file.startswith("prob_"))  and  (file.endswith(".tif")):
            if file not in class_stats["filename"].to_list():
                full_path = os.path.join(root,file)
                file_size = os.path.getsize(full_path)/(1024*1024*1024)
                df = pd.DataFrame({"filename":file,"full_path":full_path,"filesize (GB)":file_size}, index=[i] ,columns=columns)
                class_stats = class_stats.append(df)
            i += 1

stats_location = "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Classification_statistics.xlsx"

for index, row in class_stats.iterrows():
    image_directory = "/content/images"
    full_path = row["full_path"]

    if update: 
        #If the row contains null values, it hasn't been completed before
        if row.isnull().values.any():
            run(full_path,image_directory)
            class_stats.loc[index, "last_updated"] = datetime.now()
            class_stats.to_excel(stats_location)
            pd.to_pickle(class_stats, stats_pkl)
        else:
            continue
    else: 
        #for fully updating the entire classification database
            run(full_path,image_directory)
            class_stats.loc[index, "last_updated"] = datetime.now()
            class_stats.to_excel(stats_location)
            pd.to_pickle(class_stats, stats_pkl)


In [None]:
full_path = "/content/drive/Shared drives/SIO and E4E Mangroves /Data/Orthomosaics/2019-05 Sian Ka'an Reserve/Site 10/skr_2019-05_site10_flight01_120m_rgb.tif"
image_directory = "/content/images"
run(full_path,image_directory)


rm: cannot remove '*.tif': No such file or directory
rm: cannot remove '*.png': No such file or directory
rm: cannot remove '*.cpg': No such file or directory
rm: cannot remove '*.dbf': No such file or directory
rm: cannot remove '*.prj': No such file or directory
rm: cannot remove '*.shp': No such file or directory
rm: cannot remove '*.shx': No such file or directory
Downloading skr_2019-05_site10_flight01_120m_rgb.tif
Retiling skr_2019-05_site10_flight01_120m_rgb.tif
0...10...20...30...40...50...60...70...80...90...100 - done.


HBox(children=(FloatProgress(value=0.0, max=8288.0), HTML(value='')))


Classifying tiles
Found 3880 images belonging to 1 classes.


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Generating tiles for probability plot


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Moving M/NM Tiles into folders
Creating Orthomosaics
0...10...20...30...40...50...60...70...80...90...100 - done.
Creating output nm_skr_2019-05_site10_flight01_120m_rgb.shp of format ESRI Shapefile.
0...10...20...30...40...50...60...70...80...90...100 - done.


NameError: ignored