In [0]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from sklearn.semi_supervised import LabelSpreading,LabelPropagation
import random
from sklearn import svm
from sklearn.kernel_approximation import Nystroem
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from scipy.ndimage import gaussian_filter
from skimage.filters import roberts,sobel, scharr, prewitt
import time

In [0]:
def read_files(tilesDir,labelledTilesDir):
    
    tiles = []
    tile_names=[]
    for r, d, f in os.walk(tilesDir):
        for item in f:
            if '.tif' in item:
                tiles.append(os.path.join(r, item))
                tile_names.append(item)

    X_labelled=[] 
    X_unlabelled=[]
    Y_labelled=[]
    Y_unlabelled=[]
    #Read in the images 
    for index, tile in enumerate(tiles):
    
        print(index,tile)
        labelled_raster_path = os.path.join(labelledTilesDir,"labelled_"+tile_names[index])
        if(os.path.exists(labelled_raster_path)):
            start_time = time.time()
            print("Procesing labelled tile...")
            lim = Image.open(labelled_raster_path)
            label = np.array(lim)
            alpha = label[:,:,3]
            r = label[:,:,0]
            alpha[alpha==255] = 1
            labels = alpha
            Y_labelled.append(labels)
        
            rim = Image.open(tile)
            raster = np.array(rim)[:,:,:3]    #It is a 4096x4096x3 image
            gaussian_i = gaussian_filtering(raster)
            sobel_i = sobel_filtering(raster)
            roberts_i = roberts_filtering(raster)
            prewitt_i = prewitt_filtering(raster)
            scharr_i = scharr_filtering(raster)
            raster = np.dstack((raster,gaussian_i))
            raster = np.dstack((raster,sobel_i))
            raster = np.dstack((raster,roberts_i))
            raster = np.dstack((raster,prewitt_i))
            raster = np.dstack((raster,scharr_i))
            
            #gray = rgb2gray(raster)           #Converting the rgb to grayscale
            X_labelled.append(raster)
            print("Total time for labelled %s"%(time.time() - start_time))
        else:
            start_time = time.time()
            print("Processing unlabelled tile...")
            rim = Image.open(tile)
            raster = np.array(rim)[:,:,:3]    #It is a 4096x4096x3 image
            gaussian_i = gaussian_filtering(raster)
            sobel_i = sobel_filtering(raster)
            roberts_i = roberts_filtering(raster)
            prewitt_i = prewitt_filtering(raster)
            scharr_i = scharr_filtering(raster)
            raster = np.dstack((raster,gaussian_i))
            raster = np.dstack((raster,sobel_i))
            raster = np.dstack((raster,roberts_i))
            raster = np.dstack((raster,prewitt_i))
            raster = np.dstack((raster,scharr_i))
            #gray = rgb2gray(raster)           #Converting the rgb to grayscale
            X_unlabelled.append(raster)
            labels = np.full((4096, 4096), -1)
            Y_unlabelled.append(labels)
            print("Total time for unlabelled %s"%(time.time() - start_time))
        
    X_labelled = np.asarray(X_labelled)
    Y_labelled = np.asarray(Y_labelled)
    X_unlabelled = np.asarray(X_unlabelled)
    Y_unlabelled = np.asarray(Y_unlabelled)

#     X_labelled = X_labelled.reshape(len(X_labelled)*X_labelled[0].shape[0]*X_labelled[0].shape[1],len(['R','G','B']))
#     Y_labelled = Y_labelled.reshape(len(Y_labelled)*Y_labelled[0].shape[0]*Y_labelled[0].shape[1],)
#     X_unlabelled = X_unlabelled.reshape(len(X_unlabelled)*X_unlabelled[0].shape[0]*X_unlabelled[0].shape[1],len(['R','G','B']))
#     Y_unlabelled = Y_unlabelled.reshape(len(Y_unlabelled)*Y_unlabelled[0].shape[0]*Y_unlabelled[0].shape[1],)
    X_labelled = X_labelled.reshape(len(X_labelled)*X_labelled[0].shape[0]*X_labelled[0].shape[1],10)
    Y_labelled = Y_labelled.reshape(len(Y_labelled)*Y_labelled[0].shape[0]*Y_labelled[0].shape[1],)
    X_unlabelled = X_unlabelled.reshape(len(X_unlabelled)*X_unlabelled[0].shape[0]*X_unlabelled[0].shape[1],10)
    Y_unlabelled = Y_unlabelled.reshape(len(Y_unlabelled)*Y_unlabelled[0].shape[0]*Y_unlabelled[0].shape[1],)
    
    return X_labelled,Y_labelled,X_unlabelled,Y_unlabelled

In [0]:
def gaussian_filtering(image):
    start_time = time.time()
    image[:,:,0] = gaussian_filter(image[:,:,0], sigma = 10)
    image[:,:,1] = gaussian_filter(image[:,:,1], sigma = 10)
    image[:,:,2] = gaussian_filter(image[:,:,2], sigma = 10)
    print("time for gaussian %s"%(time.time() - start_time))
    return image

In [0]:
def prewitt_filtering(image):
    start_time = time.time()
    gray = rgb2gray(image)
    gray = prewitt(gray)
    print("time prewitt %s"%(time.time() - start_time))
    return gray

In [0]:
def roberts_filtering(image):
    start_time = time.time()
    gray = rgb2gray(image)
    gray = roberts(gray)
    print("time roberts %s"%(time.time() - start_time))
    return gray

In [0]:
def scharr_filtering(image):
    start_time = time.time()
    gray = rgb2gray(image)
    gray = scharr(gray)
    print("time scharr %s"%(time.time() - start_time))
    return gray

In [0]:
def sobel_filtering(image):
    start_time = time.time()
    gray = rgb2gray(image)
    gray = sobel(gray)
    print("time sobel %s"%(time.time() - start_time))
    return gray

In [0]:
# def check_number_distinct_labels(y):
#     mangroves=0
#     non_mangroves=0
#     mangroves_index=[]
#     non_mangroves_index=[]
#     for index,i in enumerate(y):
#         if i==1:
#             mangroves+=1
#             mangroves_index.append(index)
#         if i==0:
#             non_mangroves+=1
#             non_mangroves_index.append(index)
#     print("Number of mangrove pixels: ",mangroves)
#     print("Number of non mangrove pixels: ",non_mangroves)
#     return mangroves_index,non_mangroves_index


def check_number_distinct_labels(y):
    mangroves_index = np.where(y == 1)
    non_mangroves_index = np.where(y == 0)
    mangroves = len(mangroves_index[0])
    non_mangroves = len(non_mangroves_index[0])
    print("Number of mangrove pixels: ",mangroves)
    print("Number of non mangrove pixels: ",non_mangroves)
    return list(mangroves_index[0]),list(non_mangroves_index[0])

In [0]:
def get_random_points(mangroves_index,non_mangroves_index,length):
    random_mangroves_index = random.sample(mangroves_index, length)
    random_non_mangroves_index = random.sample(non_mangroves_index,length)
    return random_mangroves_index,random_non_mangroves_index

In [0]:
def form_training_sets(X,Y,random_mangroves_index,random_non_mangroves_index):
    X_train=[]
    y_train=[]
    X_t1 = list(X[random_mangroves_index])
    y_t1 = list(Y[random_mangroves_index])
    X_t2 =list(X[random_non_mangroves_index])
    y_t2 = list(Y[random_non_mangroves_index])
    X_train =  X_t1 + X_t2
    y_train =  y_t1 + y_t2
   

    return X_train,y_train

In [0]:

def train_classifier(X,y):
    clf=RandomForestClassifier(max_depth = 2)
    clf.fit(X, y)
    return clf


In [0]:
def create_test_sets(X,Y):
    X_test=[]
    y_test=[]
    for i in range(len(X)-1, -1, -1):
        X_test.append(X[i])
        y_test.append(Y[i])
        if len(X_test) >=30000:
            break
    return X_test,y_test

In [0]:
def predict_labels(clf,X):
    return clf.predict(X)


In [14]:
os.getcwd()

'/content'

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
if __name__ == "__main__":
    #Read the image files
    start_time = time.time()
    tilesDir  = "/content/drive/My Drive/Colab Notebooks/Tiles" 
    labelledTilesDir = "/content/drive/My Drive/Colab Notebooks/Labelled_Tiles"
    
    print("Reading the original tiles and the available labelled tiles....")
    X_labelled,Y_labelled,X_unlabelled,Y_unlabelled = read_files(tilesDir,labelledTilesDir)
    #print(X_labelled.shape)
    

Reading the original tiles and the available labelled tiles....
0 /content/drive/My Drive/Colab Notebooks/Tiles/clippedtest_2_3.tif
Processing unlabelled tile...
time for gaussian 6.228050231933594
time sobel 1.1302857398986816
time roberts 0.7832095623016357
time prewitt 0.9567339420318604
time scharr 0.9282853603363037
Total time for unlabelled 13.674824476242065
1 /content/drive/My Drive/Colab Notebooks/Tiles/clippedtest_3_3.tif
Processing unlabelled tile...
time for gaussian 6.103715658187866
time sobel 0.9152545928955078
time roberts 0.7532267570495605
time prewitt 0.8907713890075684
time scharr 0.8923568725585938
Total time for unlabelled 12.352570295333862
2 /content/drive/My Drive/Colab Notebooks/Tiles/clippedtest_3_2.tif
Procesing labelled tile...
time for gaussian 6.23469877243042
time sobel 0.8861279487609863
time roberts 0.757068395614624
time prewitt 0.88262939453125
time scharr 0.8867578506469727
Total time for labelled 12.482073545455933
3 /content/drive/My Drive/Colab N

In [17]:
    print("Checking number of mangrove/non-mangrove tiles in labelled data....")
    mangroves_index,non_mangroves_index = check_number_distinct_labels(Y_labelled)

    random_mangroves_index,random_non_mangroves_index = get_random_points(mangroves_index,non_mangroves_index,100000)
    

Checking number of mangrove/non-mangrove tiles in labelled data....
Number of mangrove pixels:  15798759
Number of non mangrove pixels:  17755673


In [18]:
    X_train,y_train = form_training_sets(X_labelled,Y_labelled,random_mangroves_index,random_non_mangroves_index)
   # X_train,y_train = form_training_sets(X_labelled,Y_labelled,mangroves_index,non_mangroves_index)
    # print(len(X_train))
    # print(len(y_train))

    print("Fitting the classifier....")
    clf = train_classifier(X_train,y_train)
    
    print("Predicting the labels for the unlabelled pixels....")
    y_unlabelled_pred = predict_labels(clf,X_unlabelled)
    
    
    print("Retraining the model with using the unlabelled data....")
    mangroves_index,non_mangroves_index = check_number_distinct_labels(y_unlabelled_pred)
    random_mangroves_index,random_non_mangroves_index = get_random_points(mangroves_index,non_mangroves_index,30000)
    X_train_unlabelled,y_train_unlabelled = form_training_sets(X_unlabelled,y_unlabelled_pred,random_mangroves_index,random_non_mangroves_index)
    X_train.extend(X_train_unlabelled)
    y_train.extend(y_train_unlabelled)
    clf = train_classifier(X_train,y_train)
    
    print("Testing the model....")
    X_test,y_test = create_test_sets(X_labelled,Y_labelled)
    print("Performance on train sets...: ",clf.score(X_train, y_train))
    print("Performance of test sets of 20,000 samples:",metrics.accuracy_score(y_test, predict_labels(clf,X_test)))
    print("time %s"%(time.time() - start_time))
    
    

Fitting the classifier....
Predicting the labels for the unlabelled pixels....
Retraining the model with using the unlabelled data....
Number of mangrove pixels:  15077674
Number of non mangrove pixels:  18476758
Testing the model....
Performance on train sets...:  0.8285615384615385
Performance of test sets of 20,000 samples: 0.7160666666666666
time 221.84719133377075
