In [None]:
#Alexnet


In [7]:
#import packages
import os
import torch.nn as nn
import torch.optim as optim
import numpy as np
from PIL import Image
import torchvision.transforms.functional as F
from torchvision.transforms import v2
import matplotlib.pyplot as plt
import pathlib
from torch.utils.data import Dataset

import numpy as np
#from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score, roc_curve, auc, roc_auc_score

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import torch
import torchvision
from torchvision.models import alexnet, AlexNet_Weights

In [8]:
#good practice to provide class labels as integer arrays

CLASSES = ["BLD", "No BLD"]
class_mapping = {label: idx for idx, label in
                 enumerate(np.unique([CLASSES]))}


In [9]:
#dictonary to modify transformations
config = dict(
    rot=90,
    noise=0.05
)

In [10]:
#make sure image input size matches expected size for the spcific models

train_transform = transforms.Compose(
    #384x384 for efficientnet
    #224x224 for ResNEt and MobileNEt, Alexnet
    #299x299 for Inception V3 #https://pytorch.org/vision/main/models/generated/torchvision.models.inception_v3.html
    #256x256 for Swin
    [transforms.Resize([224,224]), #converts image to the pre-trained model dimension expectations
     #transforms.RandomRotation(degrees=config["rot"]),
     #transforms.RandomVerticalFlip(p=0.5),
     #transforms.RandomHorizontalFlip(p=0.5),
     transforms.Grayscale(3),
     transforms.ToTensor(), #Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
     transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),#this is a standard RGB mean and std
    ])

#validation preprocessing
val_transform = transforms.Compose(
    [transforms.Resize([224,224]), #converts image to the pre-trained model dimension expectations
     transforms.Grayscale(3),
     transforms.ToTensor(),
     transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),#this is a standard RGB mean and std
     ])
     

In [11]:
#import data
#images in directories converted to tensor format and classes obtained from direcory names
#add test folder once get more data

train_dataset = torchvision.datasets.ImageFolder(root='/project/90daydata/nematode_ml/BLD/NematodeDataset/train', transform=train_transform)
#test_dataset = torchvision.datasets.ImageFolder(root='/project/90daydata/nematode_ml/BLD/NematodeDataset/test', transform=val_transform)
val_dataset = torchvision.datasets.ImageFolder(root='/project/90daydata/nematode_ml/BLD/NematodeDataset/val', transform=val_transform)

In [6]:
# Access the transformed image
image, label = train_dataset[0]
# Check the size of the transformed image
print(image.shape) #retuns [channel, height, width]. 3 indicates color. During training an additional element is added, batch_size


torch.Size([3, 224, 224])


In [None]:
#check class names
train_dataset.class_to_idx

In [None]:
#check number of images in each set
print(f'Number of images in training dataset: {len(train_dataset)}')
#print(f'Number of images in testing dataset: {len(test_dataset)}')
print(f'Number of images in validation dataset: {len(val_dataset)}')

In [None]:
#Bing
#how to print example of each image class from dataset pytorch
#loop through and get one image per class

# Create a dictionary to store one example per class
class_examples = {}

# Iterate through the dataset to find one example per class
#unpack img and label pair from train_dataset
for img, label in train_dataset:
    if label not in class_examples:
        #if the label is not in the class_example dict, then store the image
        class_examples[label] = img
        #if the number of classes in class_examples equals the number of classes in the train_dataset, stop
    if len(class_examples) == len(train_dataset.classes):
        break  

In [None]:
#plot one image from each class in dict 'class_example'
fig = plt.figure(figsize=(10,6))
for label, img in class_examples.items():
    #the dict structure is class label: image
    #print(img)
    #print(label)
    #print(f"Labels batch shape (number): {label.size()}")

    #create subplot shape
    ax = fig.add_subplot(1,2, label+1)
    img = img.numpy().transpose((1, 2, 0)) #changing the channel and dimension order for plotting
    #img = img.numpy().transpose((0, 1, 2))
    #print({img.size})
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = std * img + mean
    img = np.clip(img, 0, 1)
    label = label
    ax.imshow(img, cmap="gray")
    #print class title
    ax.set_title(f"Label: {CLASSES[label]}")
plt.show()
   

In [12]:
#a hyperparameter to try changing
#batch_size = 4
batch_size = 16
#batch_size = 32

In [13]:
#dataloader
#output a batch of images and labels, one sample at a time

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                          shuffle=True)
#testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
#                                         shuffle=True)
valloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size,
                                         shuffle=True)



In [None]:
#print class for each sample in batch
train_features, train_labels = next(iter(trainloader))
print(train_labels)

In [None]:
# Display image and label.
#to pull out a single image and label, first put in individual variables
#https://stackoverflow.com/questions/61480762/python-matplotlib-invalid-shape-for-image-data

train_features, train_labels = next(iter(trainloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape (number): {train_labels.size()}")
img = train_features[0].numpy().transpose((1, 2, 0)) #changing the channel and dimension order for plotting
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
img = std * img + mean
img = np.clip(img, 0, 1)
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {[CLASSES[label]]}")
print(f"Label: {train_labels[0]}")

In [None]:
#https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

def imshow(inp, title=None):
    """Display image for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(trainloader))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[CLASSES[x] for x in classes])
print(classes)

In [None]:
#del model
del optimizer
del criterion




In [14]:
#AlexNet
weights=AlexNet_Weights.DEFAULT
model_AN = torchvision.models.alexnet(weights=weights)

In [15]:
#AlexNet

#Intitalize model, not freezing layers
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_an = model_AN
num_ftrs = model_an.classifier[6].in_features #print this out to confirm a value

num_classes = 2  # Replace with the number of classes in your dataset
# Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``.
model_an.classifier[6] = nn.Linear(num_ftrs, num_classes)

#move model to this device
#model_an = model_an.to(device)


#Loss function

#criterion = nn.CrossEntropyLoss()
#criterion = nn.BCELoss() 
criterion = nn.BCEWithLogitsLoss() #recommended for binary classificaion


# Observe that all parameters are being optimized
#optimizer = optim.SGD(model_ft.parameters(), lr=0.001)
optimizer = optim.Adam(model_an.parameters(), lr=0.001)

# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


In [16]:
#Pg. 479 pytorch book
#modified to accomidate BCELoss format expectations in the pred output
#Pg. 473
#If returning 1 probablity from model(x_batch) (using [:,0]), then use BCE function
#If retunring 2 proabability values, use Cross entropy loss function

def train_1(model, num_epochs, train_d1, valid_d1):
    loss_hist_train = [0] * num_epochs
    accuracy_hist_train = [0] * num_epochs
    loss_hist_valid = [0] * num_epochs
    accuracy_hist_valid = [0] * num_epochs
    
    for epoch in range(num_epochs):
        model.train() #set model to training mode
        for x_batch, y_batch, in train_d1:
            #output of forward pass a tensor with predictions for the batch
            pred = model(x_batch)[:,0] #slicing the first column to make shapes match for BCE loss
            #print(pred.size(), y_batch.size()) #use this to troubleshoot BCE error. Shapes should match
        
        #uncomment block
            loss = criterion(pred, y_batch.float()) 
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            loss_hist_train[epoch] += loss.item()*y_batch.size(0)
            is_correct = ((pred>=0.5).float() == y_batch).float() #calculating correct class from first column of pred tensor [:,0]
            accuracy_hist_train[epoch] += is_correct.sum()
        loss_hist_train[epoch] /= len(train_d1.dataset)
        accuracy_hist_train[epoch] /= len(train_d1.dataset)
        
        model.eval() 
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for x_batch, y_batch in valid_d1:
                pred = model(x_batch)[:,0] #slicing the first column to make shapes match for BCE loss
                 #save probabilities from previously trained model
                #print(f'pred {pred}')
                outputs = model(x_batch)

                #saving largest probabilities in each batch
                probs, preds = torch.max(outputs,1) 
                #print(probs)
                print(f'preds {preds}')
                
                
               #uncomment block
                loss = criterion(pred, y_batch.float()) #BCE expects floar
                loss_hist_valid[epoch] += \
                    loss.item() * y_batch.size(0)
                is_correct = \
                    ((pred>=0.5).float() == y_batch).float()
                accuracy_hist_valid[epoch] += is_correct.sum()
            loss_hist_valid[epoch] /= len(valid_d1.dataset)
            accuracy_hist_valid[epoch] /= len(valid_d1.dataset)

            #Preparing data for evaluation
            #append values in list after loop   
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())
            #convert to np array  
            #all_preds = np.array(all_preds)
            #all_labels = np.array(all_labels)
            
            #all_preds = torch.tensor(all_preds)
            #all_labels = torch.tensor(all_labels)
            print(f'all preds {all_preds}')
            print(f'all labels {all_labels}')
        

            #uncomment block
            print(f'Epoch {epoch+1} ' 
                  f'accuracy: {accuracy_hist_train[epoch]:.4f}, '
                  f'val_accuracy: {accuracy_hist_valid[epoch]:.4f}, '
                  f'train_loss: {loss_hist_train[epoch]:.4f}, '
                  f'val_loss: {loss_hist_valid[epoch]:.4f} ')

            

    # Compute precision
    precision = precision_score(all_labels, all_preds, average='binary')  # Use 'micro', 'macro', or 'weighted' for multi-class
    #calculate recall score
    recall = recall_score(all_labels, all_preds, average='binary')
    #Calculate f1 score
    f1 = f1_score(all_labels, all_preds, average='binary')

    # Calculate ROC AUC
    #need to confirm if sigmoid function is needed (all_predictions vs all_preds
    #auc = roc_auc_score(all_labels, all_predictions)
    auc = roc_auc_score(all_labels, all_preds)
        
                    

    print(f'Precision Score: {precision:.4f}, '
          f'Recall Score: {recall:.4f}, '
          f'F1 Score: {f1:.4f}, '
          f'ROC AUC: {auc:.4f}'
            )    

    #for graph
    fpr, tpr, thresholds = roc_curve(all_labels, all_preds) 
    roc_auc = metrics.auc(fpr, tpr)
    
    # Plot the ROC curve
    #plt.figure()  
    #plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    #plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
    #plt.xlim([0.0, 1.0])
    #plt.ylim([0.0, 1.05])
    #plt.xlabel('False Positive Rate')
    #plt.ylabel('True Positive Rate')
    #plt.title('ROC Curve for Beech Leaf Disease Classification')
    #plt.legend()
    #plt.show()

    #uncomment block
    return loss_hist_train, loss_hist_valid, \
        accuracy_hist_train, accuracy_hist_valid, \
        precision, recall, f1, fpr, tpr, auc

In [17]:
#Train model not frozen
torch.manual_seed(1)
num_epochs = 1
hist = train_1(model_an, num_epochs, trainloader, valloader)

#print(len(hist))
#print(hist[0:9])
#saved variable 'hist' contains the values from each epoch of loss_hist_train [0],
#loss_hist_valid [1], accuracy_hist_train [2], accuracy_hist_valid [3],
#contains single value of precision [4], recall, [5], f1 [6], fpr [7], tpr [8], auc [9]3

preds tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
preds tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
 all preds [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 all labels [0, 1, 1, 1, 0, 0, 0, 0, 0, 0]
Epoch 1 accuracy: 0.6375, val_accuracy: 0.6154, train_loss: 1.3035, val_loss: 0.6126 
Precision Score: 0.3000, Recall Score: 1.0000, F1 Score: 0.4615, ROC AUC: 0.5000


In [None]:
print(f'Precision Score: {hist[4]}')
print(f'Recall Score: {hist[5]}')
print(f'F1 Score: {hist[6]}')
print(f'False Positive Rate: {hist[7]}')
print(f'True Positive Rate: {hist[8]}')
print(f'ROC AUC: {hist[9]}')

In [None]:
# Plot the ROC curve
#fpr is hist[7]
#tpr is hist[8]
#roc_auc is hist[9]

plt.figure()  
plt.plot(hist[7], hist[8], label='ROC curve (area = %0.2f)' % hist[9])
plt.plot([0, 1], [0, 1], 'k--', label='50-50 guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Beech Leaf Disease Classification')
plt.legend()
plt.show()

In [None]:
#Plot learning curve
x_arr = np.arange(len(hist[0])) + 1
fig = plt.figure(figsize=(12,4))
ax = fig.add_subplot(1, 2, 1)
ax.plot(x_arr, hist[0], '-o', label='Train loss')
ax.plot(x_arr, hist[1], '--<', label='Validation loss')
ax.legend(fontsize = 15)
ax = fig.add_subplot(1, 2, 2)
ax.plot(x_arr, hist[2], '-o', label='Train accuracy')
ax.plot(x_arr, hist[3], '--<',
        label='Validation accuracy')
ax.legend(fontsize=15)
ax.set_xlabel('Epoch', size = 15)
ax.set_ylabel('Accuracy', size=15)
plt.show()


