In [34]:
import os
import numpy as np
from spectral import *
import matplotlib.pyplot as plt
import math
import cv2
from scipy.io import loadmat
from sklearn.decomposition import PCA
from sklearn import preprocessing

DATASTORE = 'C:\\Datasets\\bacterias'
spectral.settings.envi_support_nonlowercase_params = True


In [41]:
from sklearn.cluster import KMeans

def calibration(arr, I, W, D):
    for w in range(arr.shape[2]):
        if (w % 10 == 0):
            print('CAMADAS {}-{}'.format(w, w+10))

        WW = np.mean(W[:, :, w], axis=0).T
        DD = np.mean(D[:, :, w], axis=0).T
        
        for i in range(arr.shape[0]):
            for j in range(arr.shape[1]):
                den = WW[0, j] - DD[0, j]
                num = I[i, j, w] - DD[0, j]
                if den and num/den > 0:
                    arr[i, j, w] = -math.log10(num / den)
                else:
                    arr[i, j, w] = 0
    return arr

def hsi2matrix(arr):
    if len(arr.shape) != 3:
        raise BaseException('A entrada deve possuir 3 dimensões')

    r, c, w = arr.shape
    return np.reshape(arr, (r*c, w))

def mat2hsi(mat, shape):
    return np.reshape(mat, shape)

def pca_95(x):
    scaled_data = preprocessing.scale(x)
    return PCA(n_components=0.95).fit_transform(scaled_data)

def getClusters(x):
    pca_data = pca_95(x)
    km = KMeans(n_clusters=2).fit(pca_data)
    return km

def getLayer(hsi, layer):
    return hsi[:,:,layer]

def snvFilter(mat):
    nmat = np.copy(mat)
    mean = np.mean(mat, axis=1)
    std = np.std(mat, axis=1)
    for i in range(mat.shape[0]):
        nmat[i] = (nmat[i] - mean[i])/std[i]

    return nmat

def removePixels(mat, side, amount):
    cpy_mat = np.copy(mat)
    if side == 'top':
        for i in range(amount):
            cpy_mat[i, :, :] = 0
    return cpy_mat

def applyMask(km,mat):
    mask1 = np.copy(mat)
    mask2 = np.copy(mat)
    lab = km.labels_
    for i in range(mat.shape[0]):
        if lab[i] == 0:
            mask1[i] = 0
        else:
            mask2[i] = 0
    
    return (mat2hsi(mask1, mat.shape) ,mat2hsi(mask2, mat.shape))


def hsiRemoveBackground(mat):
    matSNVFilter = snvFilter(mat)
    km = getClusters(matSNVFilter)
    return (applyMask(km, mat), km)
    
def whichClusterToMantain(mask1, mask2):
    plt.figure()
    plt.title("FIGURE 1")
    plt.imshow(getLayer(mask1, 1), cmap='gray')
    plt.figure()
    plt.title("FIGURE 2")
    plt.imshow(getLayer(mask2, 2), cmap='gray')

    resp = input('Qual cluster deseja manter? (1/2)')
    if resp != 1 and resp != 2:
        raise BaseException("Selected option not available.")
    
    return resp - 1
    
def getHsiData(path):
    orig_name = [a for a in os.listdir(path) if '.hdr' in a and 'DARK' not in a and 'WHITE' not in a]
    dark_name = [a for a in os.listdir(path) if '.hdr' in a and 'DARK' in a]
    white_name = [a for a in os.listdir(path) if '.hdr' in a and 'WHITE' in a]

    I = open_image(os.path.join(path, orig_name[0]))
    W = open_image(os.path.join(path, white_name[0]))
    D = open_image(os.path.join(path, dark_name[0]))

    arr = I.load()

    return (arr, I, W, D)

def getNoBackgroundPixels(cluster: int, km, mat):
    lab = km
    arr = []
    for i in range(mat.shape[0]):
        if lab[i] == cluster:
            arr.append(mat[i,:])
    return arr

def create_training_data(choose_bac: int):
    """
        choose_bac is the bacteria to process (since takes forever to do all at once)
        returns a calibrated array based on dark and white hdr's, the pixels containing the bacteria (with no background) and the label for that bacteria
    """

    bac_dirs = os.listdir(DATASTORE)

    for ind, bac in enumerate(bac_dirs):
        if (choose_bac == ind):

            individual_bac_dir = os.path.join(DATASTORE, bac)

            arr, I, W, D = getHsiData(individual_bac_dir)

            arr_calib = calibration(arr, I, W, D)

            mat = hsi2matrix(arr_calib)
            mask1, mask2, km = hsiRemoveBackground(arr_calib)
            cluster = whichClusterToMantain(mask1, mask2)
            noBackPixels = getNoBackgroundPixels(cluster, km, mat)
            return (arr_calib, noBackPixels, ind)

In [None]:
# mask1hsi = mat2hsi(mask1, arr.shape)
# mask2hsi = mat2hsi(mask2, arr.shape)

# plt.imshow(mask1hsi[:,:,1], cmap='gray')
# plt.figure
# plt.imshow(mask2hsi[:,:,1], cmap='gray')

# f = loadmat(os.path.join(DATASTORE, 'a.mat'))
# aaaa = np.array(f['data'])
# print(aaaa.shape, mask1hsi.shape)
# plt.imshow(aaaa[:,:,0], cmap='gray')


In [8]:


create_training_data()

NotADirectoryError: [WinError 267] O nome do diretório é inválido: 'C:\\Datasets\\bacterias\\teste\\a.hdr'