In [18]:
%matplotlib inline
import matplotlib.image as mpimg
import numpy as np
import matplotlib.pyplot as plt
import os,sys
from PIL import Image
from sklearn.cluster import KMeans

In [None]:
# BLOCKS FROM PREVIUOS SCRIPT

In [None]:
def create_model(img,gt,n_cluster,patch_size):
    
    # Extract patches from input images
    img_patches = img_crop(img, patch_size, patch_size)
    gt_patches = img_crop(gt, patch_size, patch_size)
    
    # Linearize list of patches
    img_patches = np.asarray([img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))])
    gt_patches =  np.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    
    # Compute features for each image patch
    foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch

    # Create X and Y
    Y = np.asarray([value_to_class(np.mean(gt_patches[i])) for i in range(len(gt_patches))])
    X = np.asarray([extract_features(img_patches[i]) for i in range(len(img_patches))])
    
    model = KMeans(n_clusters=n_cluster, random_state=2, init = 'k-means++', n_init = 20).fit(X)
    
    labels = model.labels_
    clusters_total = np.zeros(n_cluster)
    tot = np.zeros(n_cluster)
    for i in range(len(labels)):
        clusters_total[labels[i]] = clusters_total[labels[i]] + Y[i] 
        tot[labels[i]] = tot[labels[i]] + 1
        
    clusters_label = 1*(np.divide(clusters_total,tot)>=0.3)
        
    return model,clusters_label

In [None]:
def assign_label(patch,models):
    X = np.asarray(extract_features(patch)).reshape(1,-1)
    list_label=[]
    for model in models:
        cluster_chosen = model[0].predict(X)
        list_label.append(model[1][cluster_chosen])
    
    label = 1*(np.mean(list_label)>=0.3)
    
    return label   

In [None]:
def calcul_F1(Y, label):    
    TN = 0
    FP = 0
    FN = 0
    TP = 0
    matrix = []
    

    for i in range(len(Y)):
        if (Y[i]==0) & (label[i]==0):
            TN = TN + 1
        elif (Y[i]==1) & (label[i]==0):
            FN = FN + 1   
        elif (Y[i]==1) & (label[i]==1):
            TP = TP + 1  
        else:
            FP = FP + 1
    
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    F1_score = 2*precision*recall / (precision+recall) 
    return F1_score

In [None]:
def calculate_accuracy(img,gt,models):
    
    # Extract patches from input images
    img_patches = img_crop(img, patch_size, patch_size)
    gt_patches = img_crop(gt, patch_size, patch_size)

    # Linearize list of patches
    img_patches = np.asarray([img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))])
    gt_patches =  np.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    
    # Create X and Y
    Y = np.asarray([value_to_class(np.mean(gt_patches[i])) for i in range(len(gt_patches))])
    label = ([assign_label(img_patches[i],models) for i in range(len(img_patches))])
    
    # Calculate F1 score
    F1 = calcul_F1(Y, label)
    
    return F1,label 

In [None]:
# NEW BLOCKS

In [19]:
def k_means_cross_validation(gt_imgs, imgs, k_indices, k , n_cluster,patch_size):
    """
    Return the mean F1_score of our prevision.
    """
    
    # Create the test and train samples
    train_indices = np.delete(k_indices , k , 0).reshape((k_indices.shape[0]-1) * k_indices.shape[1])
    imgs_test = []
    gts_test = []
    imgs_train = []
    gts_train = []
    for el in k_indices[k]:
        imgs_test.append(imgs[el])
        gts_test.append(gt_imgs[el])
    for el in train_indices:
        imgs_train.append(imgs[el])
        gts_train.append(gt_imgs[el])
    
    F1_score = np.zeros(len(imgs_test))
    models = []
    for img,gt in zip(imgs_train,gts_train):
        models.append(create_model(img,gt,n_cluster,patch_size))
    
    tot = 0
    F1_score = 0
    for img_test,gt_test in zip(imgs_test,gts_test):
        F1_score_temp,_ = calculate_accuracy(img_test,gt_test,models)
        F1_score = F1_score + F1_score_temp
        tot = tot + 1
    
    return np.divide(F1_score,tot)

In [20]:
def best_k(list_nclusters,imgs,gt_imgs,k_fold,patch_size):
    
    # Vector to store the mean F1 (over number of clusters) at each step
    F1=np.zeros(list_nclusters.size)
    
    for ind_clust,n_cluster in enumerate(list_nclusters):
        # Vector to store the F1 for each k-fold
        F1_k = np.zeros(k_fold) 
        
        # split data in k fold
        k_indices = build_k_indices(imgs, k_fold, seed)
                
        for k in range (k_fold):
            # Retrieve the F1_score
            F1_score = k_means_cross_validation(gt_imgs, imgs, k_indices, k , n_cluster,patch_size)
                
            #Store the result
            F1_k[k]= F1_score

        F1[ind_clust]=F1_k.mean()
    
    return F1

In [21]:
def build_k_indices(y, k_fold, seed):
    """build k indices for k-fold."""
    num_row = len(y)
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)

In [22]:
seed = 1
list_nclusters = np.arange(2,5)
k_fold = 5
patch_size = 30
best_k(list_nclusters,imgs,gt_imgs,k_fold,patch_size)