In [1]:
from pickle import load
import numpy as np
from matplotlib import pyplot as plt
import random

from sklearn.model_selection import train_test_split


%load_ext autoreload
%autoreload 2

In [2]:
seed = 42
random.seed(seed)
np.random.seed(seed)

In [3]:
with open('./data/data_64.pkl', 'rb') as f:
    data = load(f)

len(data)

964

In [4]:
classes = np.random.randint(0,len(data),(200, 4))

same_classes = False
for i in range(classes.shape[0]):
    for j in range(i+1, classes.shape[0]):
        if tuple(classes[i]) == tuple(classes[j]):
            print("same class")
            same_classes = True

if not same_classes:
    np.save("data/classes", classes)


In [5]:
classes = np.load("data/classes.npy")
classes.shape


(200, 4)

In [6]:
def generate_signature(data, inx_1, inx_2, inx_3, inx_4):
    start_range = 10
    result_img = np.zeros((128,128))
    img_1 = random.choice(data[inx_1]) == False
    img_2 = random.choice(data[inx_2]) == False
    img_3 = random.choice(data[inx_3]) == False
    img_4 = random.choice(data[inx_4]) == False

    row, col = np.random.randint([32-start_range,0], [32+start_range,0+start_range])
    result_img[row:row+64, col:col+64] += img_1

    row, col = np.random.randint([32-start_range,21-start_range], [32+start_range,21+start_range])
    result_img[row:row+64, col:col+64] += img_2

    row, col = np.random.randint([32-start_range,42-start_range], [32+start_range,42+start_range])
    result_img[row:row+64, col:col+64] += img_3

    row, col = np.random.randint([32-start_range,64-start_range], [32+start_range,64])
    result_img[row:row+64, col:col+64] += img_4

    result_img =  result_img > 0

    return result_img

In [7]:
num_samples = 100

# Generate pairs of images and labels
samples = []
labels = []

for label in range(100):
    for _ in range(num_samples):
        # Generate a pair of random images
        image = generate_signature(data, *classes[label])        
        
        # Append to the dataset
        samples.append(image)
        labels.append(label)

# Convert lists to NumPy arrays
samples = np.array(samples)
samples = samples.reshape(10000, 1, 128, 128)
labels = np.array(labels) 

In [8]:
X_train, X_test, y_train, y_test = train_test_split(samples, labels, test_size=0.1, random_state=seed)

In [9]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((9000, 1, 128, 128), (9000,), (1000, 1, 128, 128), (1000,))

In [10]:
num_samples = 2

# Generate pairs of images and labels
X_unseen_test = []
y_unseen_test = []

for label in range(100, 200):
    for _ in range(num_samples):
        # Generate a pair of random images
        image = generate_signature(data, *classes[label])        
        
        # Append to the dataset
        X_unseen_test.append(image)
        y_unseen_test.append(label)

# Convert lists to NumPy arrays
X_unseen_test = np.array(X_unseen_test)
X_unseen_test = X_unseen_test.reshape(200, 1, 128, 128)
y_unseen_test = np.array(y_unseen_test) 

In [12]:
np.savez("data/test.npz", data=X_test, labels=y_test)
np.savez("data/train.npz", data=X_train, labels=y_train)
np.savez("data/test_unseen.npz", data=X_unseen_test, labels=y_unseen_test)


In [None]:
np.load("data/train.npz")["data"].shape, np.load("data/test.npz")["data"].shape, np.load("data/test_unseen.npz")["data"].shape

((9000, 1, 128, 128), (1000, 1, 128, 128), (200, 1, 128, 128))

In [15]:
np.load("data/train.npz")["labels"].shape, np.load("data/test.npz")["labels"].shape, np.load("data/test_unseen.npz")["labels"].shape

((9000,), (1000,), (200,))