In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.transforms as transforms
import os
import random
import shutil
import torchvision.datasets as datasets
import torchvision
import tqdm
import sklearn
import sklearn.datasets
from tensorboardX import SummaryWriter
from lib.dataset import random_split

In [3]:
GPU_MODE = torch.cuda.is_available()
data_dir = 'data/'

In [4]:
classes = ['healthy', 'Bacterial_spot', 'Early_blight', 'Late_blight', 'Leaf_Mold', 'Septoria_leaf_spot', 
           'Spider_mites Two-spotted_spider_mite', 'Target_Spot', 'Tomato_mosaic_virus', 
          'Tomato_Yellow_Leaf_Curl_Virus']
binary_classes = ['healthy', 'non_healthy']

if os.path.exists(data_dir):
    shutil.rmtree(data_dir)
os.mkdir(data_dir)
os.mkdir(data_dir + 'test')
os.mkdir(data_dir + 'test/healthy')

shutil.copytree("original_dataset/healthy", data_dir + "train/healthy")
for i in range(int(len(os.listdir(data_dir + 'train/healthy')) * 0.2)): #testing parameter
    src = random.choice(os.listdir(data_dir + "train/healthy"))
    shutil.move(data_dir + "train/healthy/" + src, data_dir + "test/healthy/" + src)
    
os.mkdir(data_dir + 'train/non_healthy')
os.mkdir(data_dir + 'test/non_healthy')
for cls in tqdm.tqdm(list(classes[1:])):
    for i in range(133):
        src = random.choice(os.listdir("original_dataset/" + cls))
        shutil.copyfile("original_dataset/" + cls + '/' + src, data_dir + "train/non_healthy/" + src) # will become copytree
    for i in range(33):
        src = random.choice(os.listdir(data_dir + "train/non_healthy"))
        shutil.move(data_dir + "train/non_healthy/" + src, data_dir + "test/non_healthy/" + src)

100%|██████████| 9/9 [00:04<00:00,  1.84it/s]


In [5]:
batch_size = 16
data_transforms = {
        'train': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                    data_transforms[x]) for x in ['train', 'test']}

k = 10
splits = []
for i in range(k):
    splits.append('split' + str(i))
splits.append('test')

train_dataset_len = len(image_datasets['train'])

splits_size = []
overall_splits_size = 0
for i in range(k - 1):
    overall_splits_size += train_dataset_len // k
    splits_size.append(train_dataset_len // k)
splits_size.append(train_dataset_len - overall_splits_size)

random_splits = random_split(image_datasets['train'], splits_size)
del image_datasets['train']
for i in range(k):
    image_datasets[splits[i]] = random_splits[i]

print(image_datasets)
training_dataset = {}
training_dataset['train'] = {}

validationSet = 5

for i in range(k):
    if i != validationSet:
        key = 'split' + str(i)
        training_dataset['train'] = torch.utils.data.ConcatDataset([training_dataset['train'], image_datasets[key]])
    else:
        #validation set
        print("Validation set...")
    print(len(training_dataset['train']))
print(len(training_dataset['train']))

dataset_sizes = {x: len(image_datasets[x]) for x in splits}

"""
for i in range(k):
    if i==0:
        print()
    else:
        if i==1:
            training_dataset['train'] = torch.utils.data.ConcatDataset([image_datasets['split0'], image_datasets['split1']])
        else:
            key = 'split' + str(i)
            training_dataset['train'] = torch.utils.data.ConcatDataset([training_dataset['train'], image_datasets[key]])
        print(len(training_dataset['train']))
print(len(training_dataset['train']))"""

#training_dataset['train'] = torch.utils.data.ConcatDataset([image_datasets['split0'], image_datasets['split1']])



print("========")

"""

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, 
                                              shuffle=True, num_workers=4) for x in splits}

print(len(image_datasets['split0']))

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, 
                                              shuffle=True, num_workers=4) for x in splits}

print(dataset_sizes)
print(dataloaders)



{'train': [image_datasets['split0'], image_datasets['split1']],
                                                  'test': [image_datasets['test']]}

"""

{'test': Dataset ImageFolder
    Number of datapoints: 615
    Root Location: data/test
    Transforms (if any): Compose(
                             ToTensor()
                             Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                         )
    Target Transforms (if any): None, 'split0': <lib.dataset.Subset object at 0x7f824db2da20>, 'split1': <lib.dataset.Subset object at 0x7f824db2da58>, 'split2': <lib.dataset.Subset object at 0x7f824db2da90>, 'split3': <lib.dataset.Subset object at 0x7f824db2dac8>, 'split4': <lib.dataset.Subset object at 0x7f824db2db00>, 'split5': <lib.dataset.Subset object at 0x7f824db2db38>, 'split6': <lib.dataset.Subset object at 0x7f824db2db70>, 'split7': <lib.dataset.Subset object at 0x7f824db2dba8>, 'split8': <lib.dataset.Subset object at 0x7f824db2dbe0>, 'split9': <lib.dataset.Subset object at 0x7f824db2dc18>}
211
422
633
844
1055
Validation set...
1055
1266
1477
1688
1899
1899


"\n\ndataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, \n                                              shuffle=True, num_workers=4) for x in splits}\n\nprint(len(image_datasets['split0']))\n\ndataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, \n                                              shuffle=True, num_workers=4) for x in splits}\n\nprint(dataset_sizes)\nprint(dataloaders)\n\n\n\n{'train': [image_datasets['split0'], image_datasets['split1']],\n                                                  'test': [image_datasets['test']]}\n\n"

In [22]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

# get some random training images
for i in ['test']:
    dataiter = iter(dataloaders[i])
    images, labels = dataiter.next()

    # show images
    imshow(torchvision.utils.make_grid(images))
    # print labels
    print(' '.join('%5s' % binary_classes[labels[j]] for j in range(batch_size)))

NameError: name 'dataloaders' is not defined

In [6]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        # 3 input image channel,
        # 6 output channel,
        # 5x5 square convolution kernel
        self.conv1 = nn.Conv2d(3, 4, 5)
        self.conv2 = nn.Conv2d(4, 6, 5)
        self.conv3 = nn.Conv2d(6, 16, 5)
        # an affine operation
        self.fc1 = nn.Linear(16 * 28 * 28, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)
        
    def forward(self, x):
        # max pooling over a (2, 2) windows
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = F.max_pool2d(F.relu(self.conv3(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
model = Model()
if GPU_MODE:
    model = model.cuda()
    
print(model)

Model(
  (conv1): Conv2d(3, 4, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(4, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=12544, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=2, bias=True)
)


In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

num_epochs = 2
num_batches = 8
num_iterations = 0

log_dir = 'log/'
if os.path.exists(log_dir):
    shutil.rmtree(log_dir)
writer = SummaryWriter(log_dir)

for validation_set in range(k):
    dataloaders = {}
    print("new validation set: " + str(validation_set))
    datasets = {}
    datasets['train'] = {}
    
    for split in range(k):
        key = 'split' + str(split)
        if split != validation_set:
            datasets['train'] = torch.utils.data.ConcatDataset([datasets['train'], image_datasets[key]])
        else:
            #validation set
            datasets['val'] = image_datasets[key]  
        
    #creating dataloaders
    dataloaders = {x: torch.utils.data.DataLoader(datasets[x], batch_size=batch_size,
                        shuffle=True, num_workers=4) for x in ['train', 'val'] }


    for epoch in tqdm.tqdm(list(range(num_epochs))):  # loop over the dataset multiple epochs
    
        #training and validation part
        for phase in ['train', 'val']:
            print(phase)

            if phase == 'train':
                exp_lr_scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # iterate over the data
            for i, data in enumerate(dataloaders[phase], 0):

                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if GPU_MODE:
                    inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                optimizer.zero_grad() # zero the gradient buffers

                # forward + loss
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                
                if phase == 'train':
                    # backward + optimize
                    loss.backward()
                    optimizer.step() # does the update

                 # statistics
                running_loss += loss.data.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                if i % num_batches == num_batches - 1:
                    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / num_batches))
                    num_iterations += i
                    writer.add_scalar(phase + '/loss', running_loss, num_iterations)
                    running_loss = 0.0  
            

                #epoch_loss = running_loss / dataset_sizes[phase]
                #epoch_acc = running_corrects / dataset_sizes[phase]

                #print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
        
print('Finished Training.')

0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

new validation set: 0
new validation set: 1
new validation set: 2
new validation set: 3
new validation set: 4
new validation set: 5
new validation set: 6
new validation set: 7
new validation set: 8
new validation set: 9
Finished Training.





In [None]:
testloader = dataloaders['test']
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % binary_classes[labels[j]] for j in range(16)))

In [None]:
if GPU_MODE:
    outputs = model(Variable(images).cuda())
else:
    outputs = model(Variable(images))

_, predicted = torch.max(outputs.data, 1)

print('Predicted: ', ' '.join('%5s' % binary_classes[predicted.cpu()[j]]
                              for j in range(16)))

In [None]:
correct = 0
total = 0
for data in testloader:
    images, labels = data
    if GPU_MODE:
        outputs = model(Variable(images).cuda())
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu() == labels).sum()
    else:
        outputs = model(Variable(images))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

In [None]:
class_correct = list(0. for i in range(2))
class_total = list(0. for i in range(2))
for data in testloader:
    images, labels = data
    if GPU_MODE:
        outputs = model(Variable(images).cuda())
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted.cpu() == labels).squeeze()
    else:
        outputs = model(Variable(images))
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == labels).squeeze() 
    for i in range(2):
        label = labels[i]
        class_correct[label] += c[i]
        class_total[label] += 1

for i in range(2):
    print('Accuracy of %5s : %2d %%' % (binary_classes[i], 100 * class_correct[i] / class_total[i]))