In [1]:
import sklearn

In [2]:
from tqdm import tqdm_notebook as tqdm

In [3]:
import pandas as pd
import os
import glob

In [4]:
import features

In [5]:
DATA_DIR = "../data/gan_augmented"

In [6]:
train = os.path.join(DATA_DIR, "train")
test = os.path.join(DATA_DIR, "test")
train, test

('../data/gan_augmented/train', '../data/gan_augmented/test')

# Make feature names

In [7]:
import itertools
channels = ("gray", "alpha", "beta")
props = ("contrast", "dissimilarity", "homogeneity", "energy")
distances = ("1", "5")
degrees = ("0", "45", "90", "135")
feat_tuples = list(itertools.product(channels, props, distances, degrees))
feat_names = ["_".join(t) for t in feat_tuples]

for c in channels:
    last = "_".join((c, props[-1], distances[-1], degrees[-1]))
    index_insert = feat_names.index(last) + 1
    for morph_name in reversed(("num_elements", "average_compactness", "total_compactness")):
        feat_names.insert(index_insert, c + "_" + morph_name)

# Define function for iterating over patches

In [8]:
def patch_gen(image, size=400, step=1):
    for y in range(0, image.shape[0] - size + 1, int(size * step)):
        for x in range(0, image.shape[1] - size + 1, int(size * step)):
            yield image[y:y+size, x:x+size,:]

# Get files

In [None]:
from skimage import io
from typing import List
    
print("Running", train)
files = glob.glob(train + "/**/*.jpg", recursive=True) + glob.glob(train + "/**/*.png", recursive=True)
files.sort()
len(files)

Running ../data/gan_augmented/train


2720

# Setup outfile

In [None]:
header = ",".join(["file", "label", "patch_num"] + feat_names) + "\n"

In [None]:
out_file = os.path.join(DATA_DIR, "train.csv")

In [None]:
with open(out_file, "w") as f:
    f.write(header)

# Perform feature extraction

In [None]:
for path in tqdm(files):
    label = path.split("/")[-2]
    
    img = io.imread(path)
    
    rows: List[str] = []
    
    for i, patch in enumerate(tqdm(list(patch_gen(img)), leave=False)):
        feat, _ = features.extract_delgado_features(patch)
        words = [os.path.basename(path), label, str(i)] + [str(f) for f in feat]
        row = ",".join(words)
        rows.append(row)
        
    with open(out_file, "a") as f:
        for r in rows:
            f.write(r + "\n")

HBox(children=(IntProgress(value=0, max=2720), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

  .format(dtypeobj_in, dtypeobj_out))


HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))

HBox(children=(IntProgress(value=0, max=12), HTML(value='')))