In [1]:
import os
import sys; sys.path.insert(0, os.path.abspath("../"))
import cv2
import numpy as np 
import matplotlib.pyplot as plt
# from utils.plots import get_image, get_image_and_plot
from tqdm import tqdm
import pandas as pd
from scipy.stats import skew, kurtosis
from skimage.measure import shannon_entropy
from pipeline.preprocessing import SkinLesionPreprocessing
from dataset.dataset import SkinLesion_Dataset
from pipeline.feature_extraction import FeaturesExtraction
from tqdm import tqdm
import pyarrow

In [2]:
data = SkinLesion_Dataset()
preproc = SkinLesionPreprocessing()
cfe = FeaturesExtraction(levels=['global'], color_params={})
data.md_df.head(10)

def segment(img):

    r_norm = img[:,:,2]*(1/np.sqrt(np.sum(img.astype(np.float32)**2, axis=-1)))
    rnormg = (cv2.GaussianBlur(r_norm, ksize = (0,0), sigmaX=3, borderType = cv2.BORDER_DEFAULT)*255).astype(np.uint8)
    _,mask_r = cv2.threshold(rnormg, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)

    # Filling holes
    contour_r,_ = cv2.findContours(mask_r,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contour_r:
        cv2.drawContours(mask_r,[cnt],0,255,-1)
    
    return mask_r



In [4]:
data_sample = pd.read_csv('../metadata/data_1000_sample.csv', index_col=[0])
data_sample

Unnamed: 0,img_id,label,sublabel,size,height,width,path,split
8211,2005,nevus,nev,"(450, 600, 3)",450,600,../data/binary/train/nevus/nev02005.jpg,train
10349,3045,nevus,nev,"(680, 1024, 3)",680,1024,../data/binary/train/nevus/nev03045.jpg,train
7087,1714,nevus,nev,"(768, 1024, 3)",768,1024,../data/binary/train/nevus/nev01714.jpg,train
6408,6957,nevus,nev,"(1024, 1024, 3)",1024,1024,../data/binary/train/nevus/nev06957.jpg,train
4482,479,nevus,nev,"(450, 600, 3)",450,600,../data/binary/train/nevus/nev00479.jpg,train
...,...,...,...,...,...,...,...,...
3092,2860,others,mel,"(1024, 1024, 3)",1024,1024,../data/binary/val/others/mel02860.jpg,val
2225,544,others,ack,"(450, 600, 3)",450,600,../data/binary/val/others/ack00544.jpg,val
2238,3364,others,mel,"(680, 1024, 3)",680,1024,../data/binary/val/others/mel03364.jpg,val
3083,455,others,scc,"(1024, 1024, 3)",1024,1024,../data/binary/val/others/scc00455.jpg,val


## Extract texture features

In [5]:
cfe.features_names

['global_rad_1_lbp0',
 'global_rad_1_lbp1',
 'global_rad_1_lbp2',
 'global_rad_1_lbp3',
 'global_rad_1_lbp4',
 'global_rad_1_lbp5',
 'global_rad_1_lbp6',
 'global_rad_1_lbp7',
 'global_rad_1_lbp8',
 'global_rad_1_lbp9',
 'global_rad_3_lbp0',
 'global_rad_3_lbp1',
 'global_rad_3_lbp2',
 'global_rad_3_lbp3',
 'global_rad_3_lbp4',
 'global_rad_3_lbp5',
 'global_rad_3_lbp6',
 'global_rad_3_lbp7',
 'global_rad_3_lbp8',
 'global_rad_3_lbp9',
 'global_dist1_ang0_contrast',
 'global_dist1_ang1_contrast',
 'global_dist1_ang2_contrast',
 'global_dist1_ang3_contrast',
 'global_dist2_ang0_contrast',
 'global_dist2_ang1_contrast',
 'global_dist2_ang2_contrast',
 'global_dist2_ang3_contrast',
 'global_dist1_ang0_dissimilarity',
 'global_dist1_ang1_dissimilarity',
 'global_dist1_ang2_dissimilarity',
 'global_dist1_ang3_dissimilarity',
 'global_dist2_ang0_dissimilarity',
 'global_dist2_ang1_dissimilarity',
 'global_dist2_ang2_dissimilarity',
 'global_dist2_ang3_dissimilarity',
 'global_dist1_ang0_homo

In [6]:
all_feat = []
labels = []

for i in tqdm(data_sample.index.values, total=1000):
    img = data[i]['img']
    img = preproc.preprocess(img)
    # mask = segment(img)

    labels.append(data[i]['label'])
    all_feat.append(cfe.extract_features(img))

df = pd.DataFrame(all_feat, columns=cfe.features_names)
df['label'] = labels
df.to_feather('../data/binary/ds1000_texture_features.f') 

100%|██████████| 1000/1000 [37:25<00:00,  2.25s/it] 
