## Classificador ternário para características morfométricas de núcleo/citoplasma de células cervicais 

Classificador: Normal vs ASC-US, LSIL vs ASC-H, HSIL, Carc

In [1]:
import numpy as np
import pandas as pd 
from math import sqrt
import os
import sys
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow 
from skimage import morphology, measure
from skimage.draw import polygon, polygon_perimeter
from scipy.spatial.distance import cdist
from scipy.stats import kurtosis

import pyefd
from pyefd import elliptic_fourier_descriptors, normalize_efd

from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold

from sklearn import feature_selection as fs

from itertools import cycle

from random import randint
from random import sample

import xgboost as xgb
import utils.shapeFeatures, utils.functions
import csv

In [2]:
IMG_W = 1376
IMG_H = 1020
Bethesda_classes = {'Normal':0, 'ASC-US':1, 'LSIL':2, 'ASC-H':3,'HSIL':4, 'Invasive Carcinoma':5} 
Bethesda_idx_classes = {0: 'Normal', 1:'ASC-US', 2:'LSIL', 3:'ASC-H', 4: 'HSIL', 5:'Invasive Carcinoma'} 
PATH_masks = os.path.join(os.getcwd(), 'mascaras')  
dataset_file_nuclei = os.path.join(os.getcwd(), 'base', 'nucleus-segmentations.csv') 
dataset_file_cytoplasm = os.path.join(os.getcwd(), 'base', 'cytoplasm-segmentations.csv') 

In [3]:
#print(dataset_file_nuclei, dataset_file_cytoplasm)

### Seleção de  features - shape e contorno: 

In [4]:
## Recupera arquivo
df_mask_cells = pd.read_pickle('dataCRIC2.csv')


In [5]:
#aux = df_mask_cells[['image_id', 'cell_id']]
#num_min_curvs = np.inf
#for image_id, cell_id in aux.values:
#    (xN, yN, xC, yC) = utils.shapeFeatures.get_contour_cell_points(df_mask_cells, image_id, cell_id)
#    m_curvs, zero_cross, max_points_mcurv, max = utils.shapeFeatures.calc_curvatures(xN, yN)
#    if len(max_points_mcurv) < num_min_curvs:
#        num_min_curvs = len(max_points_mcurv)
#    m_curvs, zero_cross, max_points_mcurv, max = utils.shapeFeatures.calc_curvatures(xC, yC)
#    if len(max_points_mcurv) < num_min_curvs:
#        num_min_curvs = len(max_points_mcurv)


In [6]:
x = utils.shapeFeatures.make_stats(df_mask_cells)

603.0 [0.03519862 0.03180915 0.01585278 0.014173   0.01045363]
603.0 [4.205147979095228] [5.149066525374572] [20.731059131134664]


In [13]:
((1/0.03519862 )**2 * np.pi)/603.0 

4.205148014742348

In [7]:
df_mask_cells

Unnamed: 0,image_id,cell_id,bethesda,image_filename,nparray_points_segm_Nucleus,nparray_points_segm_Cyto
0,1,14796,0,be340ee72689dfe3f8dc9c24de6127f4.png,"[[398, 157], [398, 156], [398, 155], [398, 155...","[[467, 232], [468, 232], [469, 232], [470, 232..."
1,1,14797,0,be340ee72689dfe3f8dc9c24de6127f4.png,"[[474, 379], [474, 378], [474, 378], [473, 378...","[[526, 418], [526, 417], [526, 416], [526, 415..."
2,1,14798,0,be340ee72689dfe3f8dc9c24de6127f4.png,"[[545, 983], [544, 983], [544, 983], [544, 982...","[[442, 870], [441, 870], [440, 870], [439, 870..."
3,1,14799,0,be340ee72689dfe3f8dc9c24de6127f4.png,"[[278, 124], [278, 124], [279, 124], [279, 124...","[[242, 44], [241, 44], [240, 44], [240, 44], [..."
4,1,14801,0,be340ee72689dfe3f8dc9c24de6127f4.png,"[[140, 217], [139, 217], [139, 217], [138, 217...","[[238, 197], [238, 196], [238, 195], [238, 194..."
...,...,...,...,...,...,...
3228,399,11539,5,dc2df7c3f88649ded343b13b9486cddf.png,"[[405, 562], [405, 563], [405, 563], [404, 563...","[[360, 542], [360, 543], [360, 543], [361, 543..."
3229,399,11540,5,dc2df7c3f88649ded343b13b9486cddf.png,"[[510, 498], [509, 498], [509, 498], [509, 499...","[[496, 439], [496, 440], [496, 440], [495, 441..."
3230,400,11535,5,9ae8a4edde40219bad6303cebc672ee4.png,"[[454, 782], [454, 783], [454, 783], [453, 783...","[[429, 737], [428, 737], [428, 737], [428, 738..."
3231,400,11536,5,9ae8a4edde40219bad6303cebc672ee4.png,"[[672, 596], [672, 597], [672, 597], [672, 597...","[[688, 492], [689, 492], [689, 492], [689, 493..."


In [8]:
print(x[:])

(array([1, 0, 0, 0, 0, 0]),    bethesda  image_id  cell_id  areaN  perimeterN  major_axisN  minor_axisN  \
0         0         1    14796   72.0    77.79899    34.923416    28.487003   

   equivalent_diameterN  eccentricityN  circularityN  ...  Use_curv2C  \
0              9.574615       0.578476      0.149484  ...    5.149067   

   Use_curv3C  major_axis_angleC   area_NC  perimetro_NC  major_axis_NC  \
0   20.731059           0.827775  0.119403      0.114612       0.116392   

   minor_axis_NC  nucleus_position  sub_major_axis_angle_NC convexity_NC  
0       0.135984          0.039793                -1.376747     1.058727  

[1 rows x 54 columns])


In [9]:
def shift_points(x, y):
    minx = np.min(x)
    miny = np.min(y)
    x_shift = x.copy()
    x_shift= (x - minx) + 10
    y_shift = y.copy()
    y_shift = (y - miny) + 10
    return x_shift, y_shift


In [10]:
n = x.shape[-1]
t = np.arange(n)

# get the first element of max_points_mcurv
first = max_points_mcurv.pop()  #remove from head of queue
max_points_mcurv.append(first)  #insert from head of queue
last = max_points_mcurv.popleft()  #remove from head of queue
max_points_mcurv.appendleft(last)  #insert from head of queue

fig,axis = plt.subplots(3, 2, figsize=(26,12))
axis[0,0].plot(t, gaus_f)
axis[0,0].set( title = 'Gaus sigma({:.2f}-{:.1%})'.format(sigma, sigma/n))

axis[0,1].scatter(t, curvs,  s=0.8, linewidth=1.0) #, edgecolors = 'none')
axis[0,1].set( title = 'K(s)') 
axis[0,1].axhline(0,0,1, linewidth = 0.5, color='g')
for t2 in max_points_mcurv:
    axis[0,1].text(t2, curvs[t2], '+', color='g') 
for t3 in zero_cross:
    axis[0,1].text(t2, curvs[t2], 'x', color='r')  

axis[1,0].scatter(t, m_curvs, s=0.8, linewidth=1.0) #, edgecolors = 'none')
axis[1,0].set( title = '|K(s)|') 
axis[1,0].axhline(0,0,1, linewidth = 1, color='r') 
#for t2 in max_points_mcurv:
    #axis[1,0].text(t2, m_curvs[t2], '+', color='g')
for t3 in max_points_mcurv:
    axis[1,0].text(t3, m_curvs[t3], '+', color='g')   
    axis[1,0].text((t3), m_curvs[t3]-0.03, ('[.]:{:.3f}').format(m_curvs[t3]), color='g')  
    axis[1,0].text((t3), m_curvs[t3-1]+0.06, ('[-1]:{:.3f}').format(m_curvs[t3-1]), color='r')  
    axis[1,0].text((t3), m_curvs[t3+1]+0.05, ('[+1]:{:.3f}').format(m_curvs[t3+1]), color='r')  
    #axis[1,0].text((t3-1), m_curvs[t3+2]+0.03, ('[+2]:{:.3f}').format(m_curvs[t3+2]), color='r')
  
# Shape após suavização:
xf, yf = shift_points(x, y)
xsf, ysf = shift_points(x_smooth, y_smooth)

axis[1,1].plot(xf, yf, xsf, ysf, linewidth=1.0)
axis[1,1].set( #xlim=(np.real(trix)-3, np.real(triy)+3), xticks=np.arange(np.real(trix), np.real(triy), 10), \
       xlabel= 'x(t)', ylabel = 'y(t)', title = 'Shape original')
for t2 in max_points_mcurv:
    axis[1,1].text(xsf[t2], ysf[t2], '+', color='m') 

axis[2,0].scatter(x_smooth, y_smooth, s=0.8, linewidth=1.0 ) #, edgecolors = 'none')
axis[2,0].set( #xlim=(np.real(trix)-3, np.real(triy)+3), xticks=np.arange(np.real(trix), np.real(triy), 10), \
       xlabel= 'x_smooth(t)', ylabel = 'y_smooth(t)', title = 'Shape suavizado e maximos |K(t)|')
for t2 in max_points_mcurv:
    axis[2,0].text(x_smooth[t2], y_smooth[t2], 'x', color='r') 
axis[2,0].text(x_smooth[0], y_smooth[0]-5, '0', color='r')   
axis[2,0].text(x_smooth[4], y_smooth[4]+7, '4', color='r')
axis[2,0].text(x_smooth[first], y_smooth[first]-10, first, color='r')
axis[2,0].text(x_smooth[last], y_smooth[last]+10, last, color='r')
#axis[1].axhline(0,0,1, linewidth = 0.5, color='r') 

axis[2,1].plot(x, y, linewidth=1.0) 
axis[2,1].set( #xlim=(np.real(trix)-3, np.real(triy)+3), xticks=np.arange(np.real(trix), np.real(triy), 10), \
       xlabel= 't', ylabel = 'y(t)', title = 'Shape original e maximos |K(t)|')
for t2 in max_points_mcurv:
    axis[2,1].text(x[t2], y[t2], 'x', color='r') 
plt.show()

AttributeError: 'tuple' object has no attribute 'shape'

In [None]:
''' 
axis[1,1].scatter(t, dK,linewidth=1.0)
axis[1,1].set( title = 'Kd') 
for t2 in max_points_mcurv:
    axis[1,1].text(t2, dK[t2], 'x', color='r') 
axis[1,1].axhline(0,0,1, linewidth = 0.5, color='r') 

axis[1,2].scatter(t, d2K, linewidth=1.0)
axis[1,2].set( title = 'K2d') 
for t2 in max_points_mcurv:
    axis[1,2].text(t2, d2K[t2], 'x', color='r') 
axis[1,2].axhline(0,0,1, linewidth = 0.5, color='r') 
'''



'''
n = dK.shape[-1]
t = np.arange(n)

fig, axis = plt.subplots(2, 1, figsize=(22,12), sharex=True)

axis[0].scatter(t, dK)
axis[0].set( title = 'Kd', xlim=(n-3, n+3), xticks=(np.arange(0, 128, 5))) 
for t1 in zero_cross:
    axis[0].text(t1, dK[t1], 'x', color='m') 
    axis[0].text(t1, np.real(dK[t1]-0.01), "s:{:.1f}:{:.1f}".format(np.sign(np.real(dK[t1])), \
                                     np.sign(np.real(dK[t1+1]))), color='m') 
    axis[0].axvline(t1,0,1, linewidth = 0.2, color='r') 
    axis[0].axvline(t1+1,0,1, linewidth = 0.2, color='r') 
for t2 in max_points_mcurv:
    axis[0].text(t2, dK[t2], '#', color='r') 
    axis[0].text(t2+1, np.real(dK[t2+1]+0.01), "t+1:{:.1f}".format(np.real(dK[t2+1])), color='r') 
    axis[0].axvline(t2,0,1, linewidth = 0.2, color='r') 
    axis[0].axvline(t2+1,0,1, linewidth = 0.2, color='r') 
 
axis[0].axhline(0,0,1, linewidth = 0.2, color='r') 

axis[1].scatter(t, d2K)
axis[1].set( title = 'K2d') 
for t2 in max_points_mcurv:
    axis[1].text(t2, d2K[t2], 'x', color='r') 
    axis[1].axvline(t2,0,1, linewidth = 0.2, color='r') 
axis[1].axhline(0,0,1, linewidth = 0.2, color='r') 
'''

'\nn = dK.shape[-1]\nt = np.arange(n)\n\nfig, axis = plt.subplots(2, 1, figsize=(22,12), sharex=True)\n\naxis[0].scatter(t, dK)\naxis[0].set( title = \'Kd\', xlim=(n-3, n+3), xticks=(np.arange(0, 128, 5))) \nfor t1 in zero_cross:\n    axis[0].text(t1, dK[t1], \'x\', color=\'m\') \n    axis[0].text(t1, np.real(dK[t1]-0.01), "s:{:.1f}:{:.1f}".format(np.sign(np.real(dK[t1])),                                      np.sign(np.real(dK[t1+1]))), color=\'m\') \n    axis[0].axvline(t1,0,1, linewidth = 0.2, color=\'r\') \n    axis[0].axvline(t1+1,0,1, linewidth = 0.2, color=\'r\') \nfor t2 in max_points_mcurv:\n    axis[0].text(t2, dK[t2], \'#\', color=\'r\') \n    axis[0].text(t2+1, np.real(dK[t2+1]+0.01), "t+1:{:.1f}".format(np.real(dK[t2+1])), color=\'r\') \n    axis[0].axvline(t2,0,1, linewidth = 0.2, color=\'r\') \n    axis[0].axvline(t2+1,0,1, linewidth = 0.2, color=\'r\') \n \naxis[0].axhline(0,0,1, linewidth = 0.2, color=\'r\') \n\naxis[1].scatter(t, d2K)\naxis[1].set( title = \'K2d\') \n

In [None]:
def formatting(string_numpy):
    """formatting : Conversion of String List to List
    Args:
        string_numpy (str)
    Returns:
        l (list): list of values
    """
    list_values = str(string_numpy).split(", ")
    list_values[0] = list_values[0][2:]
    list_values[-1] = list_values[-1][:-2]
    return list_values
 