In [18]:
from pathlib import Path

path = Path().cwd().parent / 'data' / 'raw' / 'array_dataset' / 'test' / 'probed'
files = [f for f in path.glob('*.jpg')]

for i in range(len(files)):
    new_name = path / f'{i}.jpg'
    files[i].rename(new_name)

print('Done')

Done


In [12]:
import cv2
import numpy as np
import torch

def make_train_test(src, dest, resize=(70, 395)):
    defects = [str(f) for f in (src / 'train'/ 'probed').glob('*.jpg')]
    non_defects = [str(f) for f in (src / 'train' / 'fresh').glob('*.jpg')]

    test_defects = [str(f) for f in (src / 'test' / 'probed').glob('*.jpg')]
    test_non_defects = [str(f) for f in (src / 'test' / 'fresh').glob('*.jpg')]

    # set seed
    np.random.seed(42)
    # randomly sample non-defects to match the number of defects
    if len(defects) < len(non_defects):
        non_defects = np.random.choice(non_defects, len(defects), replace=False)
    else:
        defects = np.random.choice(defects, len(non_defects), replace=False)


    # non_defects = np.random.choice(non_defects, len(defects), replace=False)

    defect_images = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in defects]
    defect_images = [cv2.resize(img, resize) for img in defect_images]
    non_defect_images = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in non_defects]
    non_defect_images = [cv2.resize(img, resize) for img in non_defect_images]

    test_defect_images = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in test_defects]
    test_defect_images = [cv2.resize(img, resize) for img in test_defect_images]
    test_non_defect_images = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in test_non_defects]
    test_non_defect_images = [cv2.resize(img, resize) for img in test_non_defect_images]

    # merge lists
    images = defect_images + non_defect_images
    test_images = test_defect_images + test_non_defect_images
    #images = np.concatenate((np.array(defect_images), np.array(non_defect_images)))

    defect_labels = np.ones(len(defects))
    non_defect_labels = np.zeros(len(non_defects))
    labels = np.concatenate((defect_labels, non_defect_labels))

    test_defect_labels = np.ones(len(test_defects))
    test_non_defect_labels = np.zeros(len(test_non_defects))
    test_labels = np.concatenate((test_defect_labels, test_non_defect_labels))

    idx = np.random.permutation(len(images))
    images, labels = np.array(images)[idx], labels[idx]

    idx = np.random.permutation(len(test_images))
    test_images, test_labels = np.array(test_images)[idx], test_labels[idx]

    X_train = np.array(images)
    X_test = np.array(test_images)
    y_train = labels
    y_test = test_labels

    print('Total training images:', len(X_train))
    print('Total training labels:', len(y_train))

    print('Total test images:',len(X_test))
    print('Total test labels:',len(y_test))
    
    torch.save(X_train, dest / 'train_data.pt')
    torch.save(X_test, dest / 'test_data.pt')
    torch.save(y_train, dest / 'train_labels.pt')
    torch.save(y_test, dest / 'test_labels.pt')

    return X_train, X_test, y_train, y_test

path = Path().cwd().parent / 'data' / 'raw' / 'array_dataset'
dest = Path().cwd().parent / 'data' / 'processed'

X_train, X_test, y_train, y_test = make_train_test(path, dest)

Total training images: 108
Total training labels: 108
Total test images: 92
Total test labels: 92


In [21]:
def make_card_data(src, dest):
    images = [str(f) for f in src.glob('*.jpg')]
    images = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in images]
    images = np.array(images)
    torch.save(images, dest / 'card_data.pt')

path = Path().cwd().parent / 'data' / 'raw' / 'card_dataset'
dest = Path().cwd().parent / 'data' / 'processed'

make_card_data(path, dest)

In [25]:
def make_user_data(src, dest):
    images = [str(f) for f in src.glob('*.jpg')]
    images = [cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in images]
    images = np.array(images)
    torch.save(images, dest / 'user_data.pt')

path = Path().cwd().parent / 'data' / 'raw' / 'test_dataset'
dest = Path().cwd().parent / 'data' / 'processed'

make_user_data(path, dest)

In [26]:
torch.load(dest / 'user_data.pt')

array([[[77, 77, 77, ..., 24, 23, 22],
        [76, 75, 75, ..., 22, 21, 20],
        [71, 70, 69, ..., 21, 19, 18],
        ...,
        [43, 37, 32, ..., 44, 44, 45],
        [45, 40, 34, ..., 52, 51, 51],
        [45, 40, 35, ..., 56, 55, 55]],

       [[78, 76, 75, ..., 24, 23, 23],
        [74, 73, 73, ..., 22, 21, 20],
        [67, 68, 69, ..., 20, 19, 18],
        ...,
        [45, 39, 33, ..., 47, 47, 46],
        [46, 41, 35, ..., 54, 53, 53],
        [45, 41, 35, ..., 56, 56, 55]],

       [[77, 78, 78, ..., 26, 25, 24],
        [77, 77, 77, ..., 24, 22, 21],
        [72, 72, 72, ..., 21, 20, 19],
        ...,
        [38, 34, 31, ..., 44, 44, 45],
        [40, 36, 32, ..., 52, 52, 53],
        [41, 35, 32, ..., 55, 55, 56]],

       ...,

       [[81, 80, 79, ..., 29, 28, 26],
        [79, 78, 78, ..., 24, 23, 22],
        [74, 74, 74, ..., 19, 18, 18],
        ...,
        [41, 36, 32, ..., 43, 43, 43],
        [45, 38, 33, ..., 51, 51, 51],
        [44, 37, 32, ..., 55, 55

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image
import cv2
from torchvision import transforms
import seaborn as sns
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split


class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        
        if self.transform:
            x = cv2.resize(x, (70,395), interpolation = cv2.INTER_AREA)
            #x = cv2.Canny(x, 10, 50)

            x = self.transform(x)
        
        return x, y
    
    def __len__(self):
        return len(self.data)


def get_loaders(X_train, X_test, y_train, y_test):

    batch_size = 12

    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.0), (1.0)),])

    gauss = np.array([cv2.GaussianBlur(img, (5, 5), 0) for img in X_train])
    #canny = np.array([cv2.Canny(img, 20, 40) for img in gauss])

    gauss_test = np.array([cv2.GaussianBlur(img, (5, 5), 0) for img in X_test])
    #canny_test = np.array([cv2.Canny(img, 5, 60) for img in gauss_test])

    train_dataset = MyDataset(gauss, y_train, transform=transform)
    trainloader = DataLoader(train_dataset, batch_size=batch_size)

    test_dataset = MyDataset(gauss_test, y_test, transform=transform)
    testloader = DataLoader(test_dataset, batch_size=batch_size)

    return trainloader, testloader




def dataset_loader(stem):
    if stem == 'reference':
        path = Path(r'C:\Users\fodor52\Desktop\probeAI\reference')
        color = 'C0'
        X_train, X_test, y_train, y_test = load_train_test(path, plot=False)
        X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.4, random_state=42)
    elif stem == 'v21_training':
        path = Path(r'C:\Users\fodor52\Desktop\probeAI\v21_training')
        color = 'C1'

        X_train, X_test, y_train, y_test = load_train_test(path, plot=False)
        X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.4, random_state=42)
    elif stem == 'training_wlr':
        path = Path(r'C:\Users\fodor52\Desktop\probeAI\training_wlr')
        color = 'C2'

        X_train, X_test, y_train, y_test = load_train_test(path, plot=False)
        indices = np.random.choice(len(X_test), int(len(X_test)*0.2), replace=False)
        indices_train = np.random.choice(len(X_train), int(len(X_train)*0.2), replace=False)

        X_train = np.concatenate((X_train, X_test[indices]))
        y_train = np.concatenate((y_train, y_test[indices]))

        X_test = np.delete(X_test, indices, axis=0)
        y_test = np.delete(y_test, indices, axis=0)

        X_train = np.delete(X_train, indices_train, axis=0)
        y_train = np.delete(y_train, indices_train, axis=0)
    else:
        raise ValueError('Invalid dataset name')
    

    print('Total training images:', len(X_train))
    print('Total training labels:', len(y_train))

    print('Total test images:',len(X_test))
    print('Total test labels:',len(y_test))


    fig, (ax1,ax2) = plt.subplots(1,2, dpi=300, figsize=(6,2), sharey=True)
    sns.histplot(y_train, ax=ax1, bins=10, color=color)
    ax1.set_title('Train data distribution')
    # add text above each bar
    for p in ax1.patches:
        if p == ax1.patches[0] or p == ax1.patches[-1]:
            ax1.annotate(str(p.get_height()), (p.get_x() +0.01, p.get_height()*0.02), fontsize=4, color='white')
    ax1.grid(alpha=0.2)
    ax1.set_xticks([0.05, 0.95], [0, 1])
    sns.histplot(y_test, ax=ax2, bins=10, color=color)
    ax2.set_title('Test data distribution')
    ax2.set_xticks([0.05, 0.95], [0, 1])
    ax2.grid(alpha=0.2)
    for p in ax2.patches:
        if p == ax2.patches[0] or p == ax2.patches[-1]:
            ax2.annotate(str(p.get_height()), (p.get_x() +0.02, p.get_height()*0.06), fontsize=4, color='white')

    plt.show()

    return X_train, X_test, y_train, y_test



In [9]:
import torch 

def mnist(src=None):
    """Return train and test dataloaders for MNIST."""
    if src is None:
        src = Path.cwd() / "data" / "raw" / "mnist"
    else:
        src = Path(src)

    train_data, train_labels = [], []
    for i in range(5):
        train_data.append(torch.load(src / f"train_images_{i}.pt"))
        train_labels.append(torch.load(src / f"train_target_{i}.pt"))

    train_data = torch.cat(train_data, dim=0)
    train_labels = torch.cat(train_labels, dim=0)

    test_data = torch.load(src / "test_images.pt")
    test_labels = torch.load(src / "test_target.pt")

    train_data = train_data.unsqueeze(1)
    test_data = test_data.unsqueeze(1)