In [None]:
import numpy as np
import skimage
import matplotlib.pyplot as plt
import cv2
import os
import pandas as pd
import PIL
import time

from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import cohen_kappa_score, accuracy_score, ConfusionMatrixDisplay
from sklearn import metrics
from sklearn.model_selection import train_test_split
import joblib

In [None]:
df_total_GR = pd.read_csv('Celulas_RF/df_final/GR_atributos.csv')
df_total_tetra = pd.read_csv('Celulas_RF/df_final/GFPGRtetra_GR_atributos.csv')
df_total_cort = pd.read_csv('Celulas_RF/df_final/CORT_GR_atributos.csv')
df_total = df_total_GR
#df_total = pd.concat([df_total_cort,df_total_tetra])

In [None]:
df_total_withoutArray = df_total_GR
df_total_withArray = pd.concat([df_total_cort,df_total_tetra]) # onlyWithArray
df_total_all = pd.concat([df_total_cort,df_total_tetra,df_total_GR])

In [None]:
def trainRF(df_total, splitsNumber, nEstimators, clfName, avgName):
    df_total = df_total[['Celula_ID', 'Mean', 'Brightness', 'target', 'contrast',
       'dissimilarity', 'homogeneity', 'ASM', 'energy', 'correlation']]
    df_total
    df_total_sinCellID = df_total.drop('Celula_ID', axis=1)
    
    # Array con los distintos atributos para despues guardarlos en un .txt con su
    # importancia en la clasificaciòn
    features=np.array(df_total_sinCellID.columns.tolist())
    features = np.delete(features, 2)
    X=df_total_sinCellID.drop('target', axis=1)
    y=df_total_sinCellID[['target']]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    
    start = time.time()

    kf = StratifiedKFold(n_splits=splitsNumber, shuffle=True)

    lista_cohen_kappa=[]
    cohen_kappa=0
    ponderacion_atributos=np.zeros((features.shape[0]+2,2),dtype='U25')

    for train_ix, valid_ix in kf.split(X_train,y_train):

        #selecciona filas
        train_X, valid_X = X_train.iloc[train_ix], X_train.iloc[valid_ix]
        train_y, valid_y = y_train.iloc[train_ix], y_train.iloc[valid_ix]

        ###############################################################    
        # Algoritmo RANDOM_FOREST para cada par de sets (TRAIN / VALID):
        ###############################################################

        #Create a Gaussian Classifier
        clf=RandomForestClassifier(n_estimators=nEstimators,class_weight='balanced',n_jobs=5,criterion='gini')

        # entreno al modelo clf.fit(X_train,y_train)    
        clf.fit(train_X,train_y.values.ravel())

        pred_y=clf.predict(valid_X)

        # Kappa de Cohen & accuracy
        ck = cohen_kappa_score(valid_y.values.ravel(),pred_y.ravel())
        lista_cohen_kappa.append(ck)

        #seleccion del modelo y guardado
        if ck > cohen_kappa:
            cohen_kappa = ck
            conf_matrix = confusion_matrix(valid_y,pred_y)

            #guardo el modelo
            joblib.dump(clf, ('RF_clasificador_muestras_gps_'+clfName+'.sav'))

            #ordeno los atributos por su importancia y los guardo en un txt 
            importances = clf.feature_importances_
            orden= np.flip((np.argsort(importances)),axis=0)
            ponderacion_atributos[0]=('cohen_Kappa',round(ck,4))
            ponderacion_atributos[2:]=np.array((features[orden],np.round(importances[orden],4))).T

    ponderacion_atributos[1]=('promedio_cohen_Kappa',round(np.average(lista_cohen_kappa),4))  
    np.savetxt(('ponderacion_atributos_'+avgName+'.txt'),ponderacion_atributos,fmt='%s', delimiter='---')

    #Graficar y guardar la matriz de confusión
    np.savetxt(('_CONF_MATRIX_'+clfName+'.txt'),conf_matrix,fmt='%i')

    end = time.time()
    print('ENTRENAMIENTO '+ str((end - start)/60))
    print(np.array(lista_cohen_kappa),cohen_kappa)
    
    return clf, X_test, y_test

def testRF(clf, X_test, y_test):
    # TEST
    pred_y_test=clf.predict(X_test)
    cohen_kappa_score_test = cohen_kappa_score(y_test.values.ravel(),pred_y_test.ravel())
    accuracy = accuracy_score(y_test.values.ravel(),pred_y_test.ravel())
    print("cohen kappa :"+str(cohen_kappa_score_test))
    print("accuracy :"+str(accuracy))
    conf_matrix_test = confusion_matrix(y_test, pred_y_test, labels=clf.classes_)
    print(conf_matrix_test)

    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_test,
                                   display_labels=clf.classes_)
    disp.plot()
    plt.xticks(rotation=30)
    plt.show()
    
    return cohen_kappa_score_test, accuracy, conf_matrix_test, pred_y_test

In [None]:
clf_withoutArray, X_test_withoutArray, y_test_withoutArray = trainRF(df_total_withoutArray, 40, 500, "withoutArray", "withoutArray")
# TEST
cohen_kappa_withoutArray, accuracy_withoutArray, conf_matrix_withoutArray, pred_y_test = testRF(clf_withoutArray, X_test_withoutArray,y_test_withoutArray)

In [None]:
clf_withArray, X_test_withArray, y_test_withArray = trainRF(df_total_withArray, 40, 500, "withArray", "withArray")
# TEST
cohen_kappa_withArray, accuracy_withArray, conf_matrix_withArray, pred_y_test = testRF(clf_withArray, X_test_withArray,y_test_withArray)

In [None]:
clf_withoutArray, X_test_all, y_test_all = trainRF(df_total_all, 40, 100, "all", "all")
# TEST
cohen_kappa_all, accuracy_all, conf_matrix_all, pred_y_test = testRF(clf_all, X_test_all,y_test_all)

In [None]:
def visualizePrediction(df_total, cellNameFile, path, trat, target, pred_y_test):
    df = df_total.iloc[X_test.index]
    df=df.drop('target', axis=1)
    df['target']=pred_y_test
    
    df[df['Celula_ID']==cellNameFile]
    
    cell = os.listdir(path)[8]+'/'
    cellcode = cell[0:8]

    img = [cv2.imread(path+cell+file, cv2.IMREAD_UNCHANGED) for file in os.listdir(path+cell) if file.endswith(".tif")]
    # remove first 10 images
    del img[:10]

    # convert img from list to np.array
    img = np.array(img)

    # mean of each pixel
    img_mean = np.mean(img, axis=0)

    df_nucleo = df[(df['Celula_ID']==cellNameFile) & (df['target']==target)]
    mask = np.full(img_mean.shape, False)
    mask = mask.flatten()
    mask[df_nucleo.index%65536] = True

    mask_reshape = np.reshape(mask, (256, 256)).astype('uint8')

    mask = mask_reshape > 0
    mascara_nucleo = mask_reshape*[255]
    img_mean[mask] = mascara_nucleo[mask]

    fig = plt.figure()
    plt.imshow(img_mean, cmap='gray') 
    plt.title('ROI')

In [None]:
df = df_total.iloc[X_test.index]
df=df.drop('target', axis=1)
df['target']=pred_y_test

In [None]:
df[df['Celula_ID']=='Y551A 06.oif.files/']

In [None]:
path = '/home/bruno/Documents/UBA/Reconocimiento de patrones/data TP-final/GPFGRY551A/'
trat = 'GR_'
visualizePrediction(df_total_all, 'Y551A 06.oif.files/', path, trat, 'BACKGROUND', pred_y_test)
visualizePrediction(df_total_all, 'Y551A 06.oif.files/', path, trat, 'CITOPLASMA', pred_y_test)
visualizePrediction(df_total_all, 'Y551A 06.oif.files/', path, trat, 'ARRAY', pred_y_test)
visualizePrediction(df_total_all, 'Y551A 06.oif.files/', path, trat, 'NUCLEO', pred_y_test)

In [None]:
mask_min_int = img_meanS > min_mean
mask_max_int = img_meanS < max_mean

mask_min_B = img_BS > min_B 
mask_max_B = img_BS <  max_B

roi = mask_min_int * mask_max_int * mask_min_B * mask_max_B
roi = roi.astype('uint8')
fig = plt.figure()
plt.imshow(roi) 
plt.axis('off')
fig.savefig('Celulas/trainannot/'+cellcode+'.png', dpi=300, bbox_inches='tight', pad_inches=0)

# show roi in mean image

img_meanS_reshape = np.reshape(img_meanS, (256, 256, 1)).astype('uint8')
# creamos nuevos canales para poder visualizar el roi
img_meanS_reshape = np.repeat(img_meanS_reshape, 3, axis=2)
# incrementamos la intensidad para que sea vea mejor
img_meanS_reshape = img_meanS_reshape*10

roi_reshape = np.reshape(roi, (256, 256, 1)).astype('uint8')
roi_reshape = np.repeat(roi_reshape, 3, axis=2)
mask_roi = roi_reshape > 0
roi_rojo = roi_reshape*[255,0,150]

img_meanS_reshape[mask_roi] = roi_rojo[mask_roi]

fig = plt.figure()
plt.imshow(img_meanS_reshape) 
plt.title('ROI')