In [1]:
from skimage.io import imread
import skimage.feature 
from sklearn import preprocessing
import numpy as np
import pandas as pd
import cv2 as cv
import os
import warnings
warnings.filterwarnings('ignore')

In [2]:
input_data_dir = '4.input'

In [3]:
no_samples = 0
labels = os.listdir(input_data_dir)
for label in labels:
    no_samples += len(os.listdir(os.path.join(input_data_dir,label)))
print('Number of samples : ',no_samples)

Number of samples :  1185


In [4]:
no_cols = 11
cols =np.asarray(['Contrast','Energy','Homogeneity','Correlation','Dissimilarity','ASM','Area','Perimeter','Epsilon','IsConvex','Class'])

In [5]:
labeling_class = {
  "benign": 0,
  "malignant": 1,
  "no_tumor": 2
}

In [6]:
def extract_feature():
    features = np.ndarray((no_samples, no_cols))
    
    labels = os.listdir(input_data_dir)
    print(labels)
    i = 0
    for label in labels:
        image_names = os.listdir(os.path.join(input_data_dir, label))
        total = len(image_names)
        print('Total images for ',label,' : ',total)
        print(total)
        for image_name in image_names:
            img = cv.imread(os.path.join(input_data_dir, label, image_name))
            img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
            
            ret,thresh = cv.threshold(img,127,255,0)
            contours,_ = cv.findContours(thresh, 1, 2)
            cnt = contours[0]

            area = cv.contourArea(cnt)
            perimeter = cv.arcLength(cnt,True)
            epsilon = 0.1*cv.arcLength(cnt,True)
            k = cv.isContourConvex(cnt)
            
            S = preprocessing.MinMaxScaler((0,11)).fit_transform(img).astype(int)

            g = skimage.feature.graycomatrix(S, distances=[1], angles=[0], levels=256, symmetric=False, normed=False)

            contrast = skimage.feature.graycoprops(g, 'contrast')
            energy = skimage.feature.graycoprops(g,'energy')
            homogeneity = skimage.feature.graycoprops(g, 'homogeneity')
            correlation = skimage.feature.graycoprops(g, 'correlation')
            dissimilarity = skimage.feature.graycoprops(g, 'dissimilarity')
            ASM = skimage.feature.graycoprops(g, 'ASM')
            
            f_arr =  np.asarray([contrast[0][0],energy[0][0],homogeneity[0][0],correlation[0][0],dissimilarity[0][0],ASM[0][0],area,perimeter,epsilon,k, labeling_class[label]], dtype='object')
            features[i] = f_arr
            i += 1
    df = pd.DataFrame(features, columns=cols)
    df['Class'] = np.int64(df['Class'])
    df.to_csv("data.csv",index= False)

In [7]:
extract_feature()

['benign', 'malignant', 'no_tumor']
Total images for  benign  :  395
395
Total images for  malignant  :  395
395
Total images for  no_tumor  :  395
395


In [8]:
data_as_frame = pd.read_csv('data.csv')

In [9]:
data_as_frame.head()

Unnamed: 0,Contrast,Energy,Homogeneity,Correlation,Dissimilarity,ASM,Area,Perimeter,Epsilon,IsConvex,Class
0,0.523178,0.5344,0.879987,0.966634,0.274531,0.285584,0.5,3.414214,0.341421,1.0,0
1,0.52317,0.419387,0.867636,0.966631,0.303373,0.175885,0.0,0.0,0.0,0.0,0
2,0.374648,0.511641,0.88567,0.966433,0.250558,0.261776,11.5,15.071068,1.507107,0.0,0
3,0.324349,0.546864,0.899062,0.974246,0.219881,0.29906,4.0,7.656854,0.765685,1.0,0
4,0.332299,0.549499,0.902133,0.972292,0.215157,0.30195,10.0,12.485281,1.248528,1.0,0


All features extracted and saved into a dataframe.