In [1]:
import os
os.environ["OMP_NUM_THREADS"] = '12'

import tensorflow as tf
from skimage.color import rgb2lab, lab2rgb
from sklearn.cluster import KMeans
import numpy as np
from skimage.color import rgb2lab, lab2rgb
from skimage.segmentation import slic
from skimage.segmentation import mark_boundaries
import cv2
import numpy as np
import pandas as pd

import time
import math

In [2]:
data_src_path = r'C:\Users\gufra\Desktop\Work\Projects\AI\ALL_Classification\Datasets\Original'
data_dst_path = r'C:\Users\gufra\Desktop\Work\Projects\AI\ALL_Classification\Datasets\CSV'

In [3]:
def preprocess(model_name, img):
    if model_name == 'mobilenetv2': return np.expand_dims(tf.keras.applications.mobilenet_v2.preprocess_input(img),axis=0)
    if model_name == 'resnet101': return np.expand_dims(tf.keras.applications.resnet.preprocess_input(img),axis=0)
    if model_name == 'vgg16': return np.expand_dims(tf.keras.applications.vgg16.preprocess_input(img),axis=0)
    if model_name == 'vgg19': return np.expand_dims(tf.keras.applications.vgg19.preprocess_input(img),axis=0)
    if model_name == 'inceptionresnetv2': return np.expand_dims(tf.keras.applications.inception_resnet_v2.preprocess_input(img),axis=0)
    if model_name == 'densenet': return np.expand_dims(tf.keras.applications.densenet.preprocess_input(img),axis=0)

In [4]:
mbase = tf.keras.applications.MobileNetV2(input_shape=(450, 450, 3),include_top=False, weights='imagenet')
mbase.trainable = False



In [5]:
rbase = tf.keras.applications.ResNet101(input_shape=(450, 450, 3), include_top=False, weights="imagenet")
rbase.trainable = False

In [6]:
v16base = tf.keras.applications.VGG16(input_shape=(450, 450, 3),include_top=False, weights='imagenet')
v16base.trainable = False

In [7]:
v19base = tf.keras.applications.VGG19(input_shape=(450, 450, 3),include_top=False, weights='imagenet')
v19base.trainable = False

In [8]:
ibase = tf.keras.applications.InceptionResNetV2(input_shape=(450, 450, 3),include_top=False, weights='imagenet')
ibase.trainable = False

In [9]:
dbase = tf.keras.applications.DenseNet121(input_shape=(450, 450, 3),include_top=False, weights='imagenet')
dbase.trainable = False

In [10]:
mobilenetv2 = tf.keras.models.Model(
    inputs=mbase.input, 
    outputs=tf.keras.layers.GlobalAveragePooling2D()(mbase.output)
)
resnet101 = tf.keras.models.Model(
    inputs=rbase.input, 
    outputs= tf.keras.layers.GlobalAveragePooling2D()(rbase.output)
)
vgg16 = tf.keras.models.Model(
    inputs=v16base.input, 
    outputs= tf.keras.layers.GlobalAveragePooling2D()(v16base.output)
)
vgg19 = tf.keras.models.Model(
    inputs=v19base.input, 
    outputs= tf.keras.layers.GlobalAveragePooling2D()(v19base.output)
)
inceptionresnetv2 = tf.keras.models.Model(
    inputs=ibase.input, 
    outputs= tf.keras.layers.GlobalAveragePooling2D()(ibase.output)
)
densenet = tf.keras.models.Model(
    inputs=dbase.input, 
    outputs= tf.keras.layers.GlobalAveragePooling2D()(dbase.output)
)

models = [mobilenetv2, resnet101, vgg16, vgg19, inceptionresnetv2, densenet]
model_names = ["mobilenetv2", "resnet101", "vgg16", "vgg19", "inceptionresnetv2", "densenet"]

In [11]:
img = cv2.imread(r"C:\Users\gufra\Desktop\Work\Projects\AI\ALL_Classification\Datasets\Original\all\UID_1_1_1_all.bmp")

mobilenetv2_num_ftrs = len(mobilenetv2.predict(preprocess("mobilenetv2",img)).flatten())
resnet101_num_ftrs = len(resnet101.predict(preprocess("resnet101",img)).flatten())
vgg16_num_ftrs = len(vgg16.predict(preprocess("vgg16",img)).flatten())
vgg19_num_ftrs = len(vgg19.predict(preprocess("vgg19",img)).flatten())
inceptionresnetv2_num_ftrs = len(inceptionresnetv2.predict(preprocess("inceptionresnetv2",img)).flatten())
densenet_num_ftrs = len(densenet.predict(preprocess("densenet",img)).flatten())

num_features = [mobilenetv2_num_ftrs, resnet101_num_ftrs, vgg16_num_ftrs, vgg19_num_ftrs, inceptionresnetv2_num_ftrs, densenet_num_ftrs]
num_features



[1280, 2048, 512, 512, 1536, 1024]

In [12]:
def get_points_straight(img, line_coord, limit, dir):
        first, last=None,None
        for i in range(limit):
            if dir == 1:
                if img[line_coord][i] == 0:
                    if first==None:first = [line_coord, i]
                    else: last = [line_coord, i]
            else:
                if img[i][line_coord] == 0:
                    if first==None:first= [i, line_coord]
                    else: last = [i, line_coord]
        return [first,last]

def diag_traverser(img, x, y, get_points, dir):
    first, last = None, None
    count = 0

    i,j=x,y
    while True:
        try:
            if i<0 or j<0:break
            if img[i,j] == 0:
                if first==None:first= [j,i]
                else: last = [j,i]
                count+=1

            i+=1
            if dir==1: j+=1
            else: j-=1
        except:
            break

    if get_points:return [first,last]
    return count

def get_distance(point1, point2):
    if point1==None or point2==None:return -1
    return math.dist(point1,point2)

def get_distance_between_centers(line1_points, line2_points):
    point11, point12 = line1_points[0], line1_points[1]
    point21, point22 = line2_points[0], line2_points[1]

    if None in [point11, point12, point21, point22]: return -1

    line1_center = [(point11[0] + point12[0])/2, (point11[1] + point12[1])/2]
    line2_center = [(point21[0] + point22[0])/2, (point21[1] + point22[1])/2]

    return get_distance(line1_center, line2_center)

def get_structural_features(img):
    bw = cv2.Canny(img, 100, 200)
    bw = cv2.bitwise_not(bw)

    h,w = bw.shape
    cropped = bw[:,40:w-40]
    h,w = cropped.shape

    partition_offset_x = w//4
    partition_offset_y = h//4
    line1_x = partition_offset_x
    line2_x = partition_offset_x*2
    line3_x = partition_offset_x*3
    line1_y = partition_offset_y
    line2_y = partition_offset_y*2
    line3_y = partition_offset_y*3

    points_hor1 = get_points_straight(cropped, line1_y, w, 1)
    points_hor2 = get_points_straight(cropped, line2_y, w, 1)
    points_hor3 = get_points_straight(cropped, line3_y, w, 1)
    points_ver1 = get_points_straight(cropped, line1_x, h, 2)
    points_ver2 = get_points_straight(cropped, line2_x, h, 2)
    points_ver3 = get_points_straight(cropped, line3_x, h, 2)
    points_diag11 = diag_traverser(cropped, 0, 0, True, 1)
    points_diag12 = diag_traverser(cropped, 0, w//2, True, 1)
    points_diag13 = diag_traverser(cropped, h//2, 0, True, 1)
    points_diag21 = diag_traverser(cropped, 0,w-1, True, 2)
    points_diag22 = diag_traverser(cropped, 0,w//2, True, 2)
    points_diag23 = diag_traverser(cropped, h//2,w-1, True, 2)

    disth1 = get_distance(points_hor1[0],points_hor1[1])
    disth2 = get_distance(points_hor2[0],points_hor2[1])
    disth3 = get_distance(points_hor3[0],points_hor3[1])
    distv1 = get_distance(points_ver1[0],points_ver1[1])
    distv2 = get_distance(points_ver2[0],points_ver2[1])
    distv3 = get_distance(points_ver3[0],points_ver3[1])
    distd11 = get_distance(points_diag11[0],points_diag11[1])
    distd12 = get_distance(points_diag12[0],points_diag12[1])
    distd13 = get_distance(points_diag13[0],points_diag13[1])
    distd21 = get_distance(points_diag21[0],points_diag21[1])
    distd22 = get_distance(points_diag22[0],points_diag22[1])
    distd23 = get_distance(points_diag23[0],points_diag23[1])

    cbp_11 = diag_traverser(cropped, 0, 0, False, 1)
    cbp_12 = diag_traverser(cropped, 0, w//2, False, 1)
    cbp_13 = diag_traverser(cropped, h//2, 0, False, 1)
    cbp_21 = diag_traverser(cropped, 0, w-1, False, 2)
    cbp_22 = diag_traverser(cropped, 0, w//2, False, 2)
    cbp_23 = diag_traverser(cropped, h//2, w-1, False, 2)

    block_counts = []
    for i in range(4):
        for j in range(4):
            block_cropped = cropped[partition_offset_x*i: partition_offset_x*(i+1), partition_offset_y*j: partition_offset_y*(j+1)]
            block_counts.append(np.count_nonzero(block_cropped==0))

    distc1 = get_distance_between_centers(points_hor1, points_ver3)
    distc2 = get_distance_between_centers(points_hor2, points_ver2)
    distc3 = get_distance_between_centers(points_hor3, points_ver1)
    distc4 = get_distance_between_centers(points_diag11, points_diag21)
    distc5 = get_distance_between_centers(points_diag12, points_diag23)
    distc6 = get_distance_between_centers(points_diag13, points_diag22)

    count = np.count_nonzero(img==0)

    data = []
    data.append(points_hor1[0][1] if (points_hor1[0] is not None) else -1)
    data.append(points_hor2[0][1] if (points_hor2[0] is not None) else -1)
    data.append(points_hor3[0][1] if (points_hor3[0] is not None) else -1)
    data.append(points_ver1[0][0] if (points_ver1[0] is not None) else -1)
    data.append(points_ver2[0][0] if (points_ver2[0] is not None) else -1)
    data.append(points_ver3[0][0] if (points_ver3[0] is not None) else -1)
    data.append(points_hor1[1][1] if (points_hor1[1] is not None) else -1)
    data.append(points_hor2[1][1] if (points_hor2[1] is not None) else -1)
    data.append(points_hor3[1][1] if (points_hor3[1] is not None) else -1)
    data.append(points_ver1[1][0] if (points_ver1[1] is not None) else -1)
    data.append(points_ver2[1][0] if (points_ver2[1] is not None) else -1)
    data.append(points_ver3[1][0] if (points_ver3[1] is not None) else -1) #12

    data.append(points_diag11[0][0] if (points_diag11[0] is not None) else -1)
    data.append(points_diag12[0][0] if (points_diag12[0] is not None) else -1)
    data.append(points_diag13[0][0] if (points_diag13[0] is not None) else -1)
    data.append(points_diag21[0][1] if (points_diag21[0] is not None) else -1)
    data.append(points_diag22[0][1] if (points_diag22[0] is not None) else -1)
    data.append(points_diag23[0][1] if (points_diag23[0] is not None) else -1)
    data.append(points_diag11[1][0] if (points_diag11[1] is not None) else -1)
    data.append(points_diag12[1][0] if (points_diag12[1] is not None) else -1)
    data.append(points_diag13[1][0] if (points_diag13[1] is not None) else -1)
    data.append(points_diag21[1][1] if (points_diag21[1] is not None) else -1)
    data.append(points_diag22[1][1] if (points_diag22[1] is not None) else -1)
    data.append(points_diag23[1][1] if (points_diag23[1] is not None) else -1) #24

    data.append(disth1)
    data.append(disth2)
    data.append(disth3)
    data.append(distv1)
    data.append(distv2)
    data.append(distv3) #30

    data.append(distd11)
    data.append(distd12)
    data.append(distd13)
    data.append(distd21)
    data.append(distd22)
    data.append(distd23) #36

    data.append(cbp_11)
    data.append(cbp_12)
    data.append(cbp_13)
    data.append(cbp_21)
    data.append(cbp_22)
    data.append(cbp_23) #42

    data = data + block_counts #58

    data.append(distc1)
    data.append(distc2)
    data.append(distc3)
    data.append(distc4)
    data.append(distc5)
    data.append(distc6) #64

    data.append(count) #65

    return data

In [12]:
def lab_features(img):
    lab_img = rgb2lab(img)
    segments = slic(lab_img, n_segments=3000, compactness=10, sigma=1, start_label=1)

    pixelIdxList = [[] for _ in range(np.max(segments)+1)]
    for i in range(segments.shape[0]):
        for j in range(segments.shape[1]):
            pixelIdxList[segments[i,j]].append((i,j))
    
    mean_colors = np.zeros((np.max(segments)+1,3),dtype='float')
    for i in range(np.max(segments)+1):
        pixels = pixelIdxList[i]
        pixels = np.array(pixels)
        if pixels.size>0:
            pixels_lab = lab_img[pixels[:,0],pixels[:,1],:]
            mean_colors[i,0] = np.mean(pixels_lab[:,0])
            mean_colors[i,1] = np.mean(pixels_lab[:,1])
            mean_colors[i,2] = np.mean(pixels_lab[:,2])
    
    kmeans = KMeans(n_clusters=25, n_init=2).fit(mean_colors)
    centers_lab = kmeans.cluster_centers_
    centers = centers_lab.reshape((1, -1))

    return centers.flatten()

In [12]:
def cnn_feature_extractor(model_indices, include_lab):
    splits = os.listdir(data_src_path)
    csvs = os.listdir(data_dst_path)

    t=time.time()

    df = None
    for s in splits:
        if df is None:
            sum_num_features = 0
            for mi in model_indices:
                sum_num_features+=num_features[mi]
            
            if include_lab: sum_num_features+=75
            cols = ["f"+str(n) for n in range(sum_num_features)]
            cols.append("class")
            df = pd.DataFrame(columns=cols)

        print(f"Processing {s}")

        curr_path = os.path.join(data_src_path, s)
        images = os.listdir(curr_path)

        c = 0
        for img in images:
            img = cv2.imread(os.path.join(curr_path,img))
            
            features = []
            for mi in model_indices:
                features.append(models[mi].predict(preprocess(model_names[mi], img.copy()),verbose=0).flatten())
            
            if include_lab: features.append(lab_features(img.copy()))
            concatenated_features = tf.keras.layers.Concatenate()(features)
            concatenated_features = list(concatenated_features)+[1 if s=="all" else 0]
            concatenated_features = [float(num) for num in concatenated_features]

            row = pd.DataFrame([concatenated_features], columns=cols)
            df = pd.concat([df, row], ignore_index=True)

            c+=1
            if c%100==0:print(c,end=' ')

        print("\n")
    
    mns = [name for name in model_names if model_names.index(name) in model_indices]
    df.to_csv(os.path.join(data_dst_path,f"{'_'.join(mns)}.csv"), index=False)
    print((time.time()-t))

In [13]:
cnn_feature_extractor([0], False)

Processing all
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 4100 4200 4300 4400 4500 4600 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 6100 6200 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 

Processing hem
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 

4033.155663728714


In [14]:
cnn_feature_extractor([1], False)

Processing all
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 4100 4200 4300 4400 4500 4600 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 6100 6200 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 

Processing hem
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 

5547.4913737773895


In [15]:
cnn_feature_extractor([2], False)

Processing all
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 4100 4200 4300 4400 4500 4600 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 6100 6200 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 

Processing hem
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 

2588.513947248459


In [13]:
cnn_feature_extractor([3], False)

Processing all
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 4100 4200 4300 4400 4500 4600 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 6100 6200 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 

Processing hem
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 

2259.0196425914764


In [14]:
cnn_feature_extractor([4], False)

Processing all
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 4100 4200 4300 4400 4500 4600 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 6100 6200 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 

Processing hem
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 

4518.131747245789


In [15]:
cnn_feature_extractor([5], False)

Processing all
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 3400 3500 3600 3700 3800 3900 4000 4100 4200 4300 4400 4500 4600 4700 4800 4900 5000 5100 5200 5300 5400 5500 5600 5700 5800 5900 6000 6100 6200 6300 6400 6500 6600 6700 6800 6900 7000 7100 7200 

Processing hem
100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 2200 2300 2400 2500 2600 2700 2800 2900 3000 3100 3200 3300 

3555.6803567409515
