Import libraries and data

In [16]:
from os import getenv
from dotenv import load_dotenv

load_dotenv()

config = {
    "DATA_DIR_PATH": getenv("DATA_DIR_PATH"),
    "SAVE_TORCH_DIR_PATH": getenv("SAVE_TORCH_DIR_PATH")
}


In [17]:
import torch
# torch.utils.data is a PyTorch method for importing data
from torch.utils.data import Dataset, random_split
import matplotlib.pyplot as plt
import aggregate_xml_data as ml_helpers
from PIL import Image, ImageMode

In [18]:
import os
from pandas import read_xml


#this class is probably just holding a dataframe
class CustomImageDataset(Dataset):
    def __init__(self, data_dir, data_label_filepath, transform=None, target_transform=None):
        self.img_labels = read_xml(data_label_filepath, xpath="//Data//Image")
        self.data_dir = data_dir
        self.transform = transform
        self.target_transform = target_transform


    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_filename = self.img_labels.iloc[idx, 0]
        #idx 12 is the <content-mapping> tag
        img_dir = self.img_labels.iloc[idx, 11]
        img_path = f'{img_dir}/{img_filename}'
        absolute_img_path = os.path.join(self.data_dir, img_path)

        image = Image.open(absolute_img_path)
        #change label index here (0 is filename)
        label = self.img_labels.iloc[idx, 12]
        # label = {
        #     "filename": self.img_labels.iloc[idx, 0],
        #     "name": self.img_labels.iloc[idx, 1],
        #     "author": self.img_labels.iloc[idx, 3]
        # }

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        
        return image, label

Transform all images, define full dataset, split full set into test and train, load test/train sets

In [19]:
from torchvision.transforms import Compose, Resize, CenterCrop, Grayscale, ToTensor
from torch.utils.data import DataLoader
import aggregate_xml_data as ml_helpers


train_transform = Compose([Grayscale(num_output_channels=1),Resize(224), CenterCrop(224), ToTensor()]) 
## changed size from 28 (model trained on this size) to 224 to check reversion to pic from tensor


target_transform = int
# target_transform = Compose([Resize(255), ToTensor()])


In [20]:
# BUILD label file
# training_data = ImageFolder(root="plant_data", transform=transform)
# target_data = ImageFolder(root="plant_data", target_transform=target_transform)
data_dir = config["DATA_DIR_PATH"]
data_label_file = "plant_labels.xml"
# image_labels_filepath = ml_helpers.aggregate_xml_data(data_dir, data_label_file)
    

In [None]:
# example  encodedimage data
{
    "1289495": 'i-am-encoded-image-data',
    "8954845": 'i-am-encoded-image-data'
}

# example image label data
[
    {
        "PlantId": 1289495,
        # other label fields
    },
    {
        "PlantId": 8954845,
        # other label fields
    }
]

# final - image label + encoded image

[
    {
        "PlantId": 1289495,
        "ImageData": 'i-am-encoded-image-data'
        # other label fields
    },
    {
        "PlantId": 8954845,
        "ImageData": 'i-am-encoded-image-data'
        # other label fields
    }
]

In [None]:
# print(image_labels_filepath)

In [None]:

# DEFINE dataset using custom class in order to try and incorporate label data
full_dataset = CustomImageDataset(data_dir, "image_labels_filepath", train_transform, target_transform)
#SPLIT INTO TEST AND TRAIN SUBSETS

print('This is the length of the full data set:', len(full_dataset))

train_set_size = int(len(full_dataset) * 0.8)
test_set_size = int(len(full_dataset) * 0.1)
validation_set_size = int(len(full_dataset))-train_set_size-test_set_size
train_set, test_set, validation_set = random_split(full_dataset, [train_set_size, test_set_size, validation_set_size])
print(train_set)
print(test_set)
print(validation_set)
   
# After
print('='*30)
print('Train data set:', len(train_set))
print('Test data set:', len(test_set))
print('Val data set:', len(validation_set))
# loading dataset in data loader
#dataloader = DataLoader(dataset=training_data, batch_size=4, shuffle=True)
train_dataloader = DataLoader(dataset=train_set, batch_size=100, shuffle=True)
test_dataloader = DataLoader(dataset=test_set, batch_size=100, shuffle=True)


In [None]:
features, label_names = next(iter(train_dataloader))
print(f"Feature batch shape: {features.size()}")
print(f"Labels batch shape: {label_names.size()}")

### Build the Neural Network

In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

#get device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")


Define NN class

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(224*224, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 3)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

Make NN instance

In [None]:
model = NeuralNetwork().to(device)
print(model)

Call the model

Parameterize the layers of the model

In [None]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Set hyperparameters

In [None]:
learning_rate = 1e-3
batch_size = 100
epochs = 5

Set optimization loop. (These are just instructions, a dictionary)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Initialize the loss function and optimizer, and pass it to train_loop and test_loop

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

In [None]:
torch.save(model, config["SAVE_TORCH_DIR_PATH"])


In [None]:
# from torchvision.transforms import ToPILImage

# val_image, val_label = next(iter(validation_set))
# post_transform = transforms.Compose([Resize(255), ToPILImage(mode="L")])

# final_pic = post_transform(val_image)
# final_pic.show()

# with torch.no_grad():
#     # print({model(val_image)})
#     final_pic = post_transform(model(val_image))
#     final_pic.show()

In [None]:
# import numpy as np

# lmodel = torch.load('<insert-file-path-here>')
# val_dataloader = DataLoader(dataset=validation_set, batch_size=3, shuffle=True)
# val_img, val_label = next(iter(val_dataloader))

# with torch.no_grad():
#     prediction = lmodel(val_img)
#     pred_label = np.argmax(prediction)

# print(pred_label)



In [None]:
# import matplotlib.pyplot as plt

# val_dataloader = DataLoader(dataset=validation_set, batch_size=3, shuffle=True)
# val_img, val_label = next(iter(val_dataloader))

# # CONTENT_LABEL_MAPPING = {
# #     "0":"Leaf",
# #     "1":"Flower",
# #     "2":"Entire",
# # }

# print(f"Feature batch shape: {val_img.size()}")
# print(f"Labels batch shape: {val_label.size()}")
# look_here = val_img.squeeze()
# validation_label = val_label[0]


# for i in range(3):
#     plt.imshow(val_img[i].squeeze())
#     print(f"Label: {val_label[i]}")
#     plt.show(i+1)
#     # plt.imshow(np.transpose(val_image.numpy(), (1, 2, 0)))
#     # plt.figure(i+1)
# plt.show

### Run trained model 

In [None]:
#running the imported model

import matplotlib.pyplot as plt
import numpy as np

batch_size = 3
trained_model = torch.load(config["SAVE_TORCH_DIR_PATH"])
val_dataloader = DataLoader(dataset=validation_set, batch_size=batch_size, shuffle=True)

val_img, val_label = next(iter(val_dataloader))
# print('test', val_img[2])
# print('this is it:', val_label.size())
# for val_img, val_label in val_dataloader:
#     print('this is it:', val_label)

with torch.no_grad():
    prediction = trained_model(val_img[0])
    print('my preds: ', prediction)
    pred_label = np.argmax(prediction)
    # print('return index at prediction: ', val_img[pred_label])
    # pred_label = prediction

print(f"Feature batch shape: {val_img.size()}")
print(f"Labels batch shape: {val_label.size()}")
print(f"Predicted Labels batch shape: {pred_label.size()}")
# look_here = val_img.squeeze()
# validation_label = val_label[0]


for i in range(1):
    plt.imshow(val_img[i].squeeze())
    print(f"Label: {val_label[i]}")
    print(val_label.dtype)
    # print(f"Predicted Label: {pred_label.item()}")
    print(f"Predicted Label: {pred_label}")
    print(pred_label.dtype)
    plt.show(i)
    # plt.imshow(np.transpose(val_image.numpy(), (1, 2, 0)))
    # plt.figure(i+1)
# plt.show

# plt.imshow(val_img.squeeze())
# print(f"Label: {val_label}")
# print(val_label.dtype)
#     # print(f"Predicted Label: {pred_label.item()}")
# print(f"Predicted Label: {pred_label}")
# print(pred_label.dtype)
#     # plt.imshow(np.transpose(val_image.numpy(), (1, 2, 0)))
#     # plt.figure(i+1)
# plt.show

val_label == pred_label