In [316]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
from glob import glob
from tqdm import tqdm

In [310]:
def get_1_binary_plane(im):
    return np.bitwise_and(im, 1)


def insert_noise(im, idx, noise):
    im_flat = im.flatten().copy()
    chosen_pix = im_flat[idx]
    np.bitwise_and(chosen_pix, 254, out=chosen_pix)
    np.bitwise_or(chosen_pix, noise, out=chosen_pix)
    im_flat[idx] = chosen_pix
    return im_flat.reshape(im.shape)


def load(im_path):
    im = cv2.imread(im_path)[..., 0]  # Remove redundant channels
    return cv2.resize(im, (257, 257))  # Normalize size


def extract_features(im):
    assert im.shape == (257, 257), f'Image has wrong size. Expected (257, 257), but received {im.shape}'
    im_plane = get_1_binary_plane(im)
    fft = np.fft.fft2(im_plane / np.sum(im_plane))  # Normalize total energy
    log_magnitude = np.log(np.abs(fft) + 1e-8)  # Adding small constant to avoid Nan in log
    features = (log_magnitude.mean(axis=0) + log_magnitude.mean(axis=1)) / 2
    return features[:257 // 2]  # Return half due to symmetry


def extract_features_tqdm(imgs):
    features = []
    for im in tqdm(imgs):
        features.append(extract_features(im))
    return features

In [311]:
def corrupt_images(imgs, q=0.2):
    corr_imgs = []
    for im in tqdm(imgs):
        n = int(im.size * q)
        noise = np.random.randint(0, 2, size=n, dtype='uint8')
        idx = np.random.permutation(im.size)[:n]
        corrupted_image = insert_noise(im, idx, noise)
        corr_imgs.append(corrupted_image)
    return corr_imgs


def corrupt_and_extractf(im_files, qs: list):
    imgs = list(map(load, im_files))
    data = {}
    for q in qs:
        corr_imgs = corrupt_images(imgs, q)
        features = extract_features_tqdm(corr_imgs)
        print(f'Finished for q={q}')
        data[q] = features
    return data

In [312]:
im_files = glob('im_data/*.tif')

In [315]:
data = corrupt_and_extractf(im_files[:len(im_files) // 2], qs=[0.2, 0.4, 0.6, 0.8, 1.0])

100%|██████████| 500/500 [00:00<00:00, 611.21it/s]
100%|██████████| 500/500 [00:03<00:00, 165.13it/s]


Finished for q=0.2


100%|██████████| 500/500 [00:00<00:00, 681.20it/s]
100%|██████████| 500/500 [00:03<00:00, 165.34it/s]


Finished for q=0.4


100%|██████████| 500/500 [00:00<00:00, 639.39it/s]
100%|██████████| 500/500 [00:02<00:00, 167.22it/s]


Finished for q=0.6


100%|██████████| 500/500 [00:00<00:00, 611.25it/s]
100%|██████████| 500/500 [00:02<00:00, 167.22it/s]


Finished for q=0.8


100%|██████████| 500/500 [00:00<00:00, 570.76it/s]
100%|██████████| 500/500 [00:02<00:00, 166.78it/s]

Finished for q=1.0





In [317]:
noncorr_imgs = list(map(load, im_files[len(im_files) // 2:]))

In [318]:
noncorr_imgs_features = extract_features_tqdm(noncorr_imgs)

100%|██████████| 500/500 [00:03<00:00, 157.03it/s]


In [322]:
# Add non corrupt features and labels
upd_data = {}
Y = np.array([0] * (len(im_files) // 2) + [1] * (len(im_files) - len(im_files) // 2))
for q, corr_features in data.items():
    upd_data[q] = (
        np.stack(corr_features + noncorr_imgs_features, axis=0),
        Y
    )

In [323]:
# Save data
for q, (features, y) in upd_data.items():
    np.save(f'features_data/features_{q}.npy', features.astype('float32'))
    np.save(f'features_data/labels_{q}.npy', y.astype('int32'))

## Do premature test

In [324]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [343]:
X = np.load('features_data/features_0.2.npy')
Y = np.load('features_data/labels_0.2.npy')

In [344]:
X.shape

(1000, 128)

In [349]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)

In [350]:
model = LogisticRegression(max_iter=1000, solver='lbfgs').fit(X_train, y_train)

In [351]:
preds = model.predict(X_test)

In [352]:
# Seems to be working
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.61      0.81      0.70       140
           1       0.77      0.56      0.64       160

    accuracy                           0.67       300
   macro avg       0.69      0.68      0.67       300
weighted avg       0.70      0.67      0.67       300

