# Generate 1000 Augmented Test Images
Select and save 1000 augmented images into a Torch tensor object as test set for further test accuracy evaluation.

**Load packages/modules**

In [13]:
import os
import time
import torch
import torchvision

from torchvision import datasets, transforms

print(f'Torch version: {torch .__version__}')

Torch version: 1.9.1


In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.utils import make_grid

%pip install torchsummary

from torchvision import models
from torchsummary import summary

Note: you may need to restart the kernel to use updated packages.


**Load data**

In [8]:
# check the total images we have
neg_data_dir = 'data/archive/original/0'  # negative = no surface crack
pos_data_dir = 'data/archive/original/1'  # positive = has surface crack

neg_aug_data_dir = 'data/archive/augmented/0'
pos_aug_data_dir = 'data/archive/augmented/1'

print(f'Total negative original images: {len(os.listdir(neg_data_dir))}')
print(f'Total negative augmented images: {len(os.listdir(neg_aug_data_dir))}')
print(f'Total positive original images: {len(os.listdir(pos_data_dir))}')
print(f'Total positive augmented images: {len(os.listdir(pos_aug_data_dir))}')

Total negative original images: 40000
Total negative augmented images: 180000
Total positive original images: 40000
Total positive augmented images: 180002


In [15]:
# load data
batch_size = 100

data_dir = 'data/archive/original'
aug_data_dir = 'data/archive/augmented'

transform = transforms.Compose([transforms.ToTensor()])

# data = datasets.ImageFolder(data_dir, transform=transform)
aug_data = datasets.ImageFolder(aug_data_dir, transform=transform)

# dataloader = torch.utils.data.DataLoader(data, 
#                                          batch_size=batch_size, 
#                                          shuffle=True, 
#                                          pin_memory=True)

aug_dataloader = torch.utils.data.DataLoader(aug_data,
                                             batch_size=batch_size, 
                                             shuffle=True, 
                                             pin_memory=True)

In [17]:
# create a small test set from augmented data -- want 1,000 images to test so retrieve 10 batches of 100

counter = 1

for i, data in enumerate(aug_dataloader, 0):
    
    if i+1 == 11:
        break
        
    else:
        
        if counter == 1:
            inputs, labels = data
            counter += 1
            
        else:
            new_inputs, new_labels = data
            inputs = torch.cat((inputs, new_inputs), 0)
            labels = torch.cat((labels, new_labels), 0)
            counter += 1
        
        print(f'Batch {i+1} added.')
        
print(f'Saving inputs {inputs.shape}, labels {labels.shape}')
        
# save test set
testset = [inputs, labels]

path = f"data/test_sets/small_1000_set.pth"
torch.save(testset, path)

Batch 1 added.
Batch 2 added.
Batch 3 added.
Batch 4 added.
Batch 5 added.
Batch 6 added.
Batch 7 added.
Batch 8 added.
Batch 9 added.
Batch 10 added.
Saving inputs torch.Size([1000, 3, 227, 227]), labels torch.Size([1000])
