This project is to build a model for recongition of 3D printing mishaps and good prints

The following code has the following setup
A training,validation and test set from "images" which is from the "printing_errors_original" folder. This new folder is organzied by class
A second test set from the folder "print_errors_2_modified_for_test" is based off our real-World set that has a large variation of photos. That could be provided by a customer

This jupyter notebook focuses on the seperation of the two and building a model SOLELY on the images from the "image" folder

THe other Jupyter notebook in this repo, will be trained in the mix of the two image datasets. to see if it will improve performance

In [None]:
#Import libraries that will be needed
import os
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
from torchvision import datasets
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torchvision.transforms import ToTensor
import random 
# Set random seed for reproducibility
manualSeed = 854
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.use_deterministic_algorithms(True) # Needed for reproducible results


In [None]:
#import folders for train and test
image_directory = "images"

In [None]:
epochs = 50
lr = 0.000001
batch = 50
image_size = 227

In [None]:
# Set your image directory path
image_directory = "images"

# Create a custom dataset
custom_dataset = datasets.ImageFolder(root=image_directory,
                                      transform=transforms.Compose([
                                          transforms.Resize(image_size),
                                          transforms.CenterCrop(image_size),
                                          transforms.ToTensor(),
                                          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      ]))

In [None]:
# Calculate the train, validation, and test sizes
train_size = int(0.7 * len(custom_dataset))  # 70% for training
val_size = int(0.1 * len(custom_dataset))   # 10% for validation
test_size = len(custom_dataset) - train_size - val_size  # Remaining 20% for testing

# Split the dataset into train, validation, and test sets
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    custom_dataset, [train_size, val_size, test_size])

# PyTorch Data Initialization
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch, shuffle=True, num_workers=2)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch, shuffle=False, num_workers=2)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch, shuffle=False, num_workers=2)

# Using ngpu
ngpu = 1
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")




In [None]:
#Model Building

#PyTorch
#Second PyTorch Model
#AlexNet Architecture

class pyTorchModel_2(nn.Module):
    def __init__(self,ngpu, num_classes=4):
        super(pyTorchModel_2, self).__init__()
        self.ngpu = ngpu
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

pymodel1 = pyTorchModel_2(ngpu).to(device)



In [None]:
loss_fn = nn.CrossEntropyLoss()

In [None]:
#If only one model
optimizer1 = torch.optim.RMSprop(pymodel1.parameters(), lr=lr )

torch.use_deterministic_algorithms(False)


In [None]:
def train_loop(dataloader, model, loss_fn, optimizer, trainLoss):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    train_loss = 0
    num_batches = len(dataloader)

    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        #TrainLoss.append(loss.item())
        train_loss += loss.item()

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    train_loss /= num_batches
    trainLoss.append(train_loss)
    


def test_loop(dataloader, model, loss_fn, testLoss, acc):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            #testLoss.append(test_loss.item())
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    testLoss.append(test_loss)
    acc.append((100*correct))
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

def validate(model, dataloader, loss_fn, valLoss, acc):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    val_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            val_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    val_loss /= num_batches
    correct /= size
    valLoss.append(val_loss)
    acc.append((100 * correct))
    print(f"Validation Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {val_loss:>8f} \n")

In [None]:

# trainLoss = []
# testLoss = []
# acc = []

# for t in range(epochs):
#     print(f"Epoch {t+1}\n-------------------------------")
#     train_loop(train_dataloader, pymodel1, loss_fn, optimizer1, trainLoss)
#     test_loop(test_dataloader, pymodel1, loss_fn, testLoss, acc)

# print("Done!")

In [None]:
trainLoss = []
testLoss = []
valLoss = []
acc = []

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, pymodel1, loss_fn, optimizer1, trainLoss)
    validate(pymodel1, val_dataloader, loss_fn, valLoss, acc)

print("Training and Validation Done!")

# After training, you can use the test_loop function to evaluate on a separate test set.
test_loop(test_dataloader, pymodel1, loss_fn, testLoss, acc)

In [None]:
def evaluate_predictions(model, dataloader):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            all_predictions.extend(pred.argmax(1).cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    return all_predictions, all_labels

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# Assuming num_classes is the number of classes in your problem (4 in this case)
num_classes = 4

def calculate_multiclass_metrics(predictions, labels, num_classes):
    # Calculate confusion matrix
    conf_matrix = confusion_matrix(labels, predictions, labels=range(num_classes))

    # Calculate precision, recall, and F1 score for each class
    precision = precision_score(labels, predictions, average=None, labels=range(num_classes))
    recall = recall_score(labels, predictions, average=None, labels=range(num_classes))
    f1 = f1_score(labels, predictions, average=None, labels=range(num_classes))

    return conf_matrix, precision, recall, f1


###This is used to test predictions from the dataset that is SIMILAR to the training data
###
###
# Use the test_loop function to get predictions on the test set
test_predictions, test_labels = evaluate_predictions(pymodel1, test_dataloader)

# Calculate multiclass metrics
conf_matrix, precision, recall, f1 = calculate_multiclass_metrics(test_predictions, test_labels, num_classes)

# Print the results
print("Confusion Matrix:")
print(conf_matrix)
print("\nPrecision:")
print(precision)
print("\nRecall:")
print(recall)
print("\nF1 Score:")
print(f1)

In [None]:
#### This test is a set of photos that are not SIMILAR to the test dataset, in terms of how the images are presented
### This is more of a real world test set, something a customer would send as photos
# Set your image directory path
real_image_directory = "print_errors_2_modified_for_test"

# Create a custom dataset
real_world_dataset = datasets.ImageFolder(root=real_image_directory,
                                      transform=transforms.Compose([
                                          transforms.Resize(image_size),
                                          transforms.CenterCrop(image_size),
                                          transforms.ToTensor(),
                                          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                                      ]))



# PyTorch Data Initialization
real_world_dataloader = torch.utils.data.DataLoader(real_world_dataset, batch_size=batch, shuffle=True, num_workers=2)



In [None]:
####Testing Real World
real_world_num_classes = 3

# Use the test_loop function to get predictions on the test set
real_world_test_predictions, real_world_test_labels = evaluate_predictions(pymodel1, real_world_dataloader)

# Calculate multiclass metrics
real_world_conf_matrix, real_world_precision, real_world_recall, real_world_f1 = calculate_multiclass_metrics(real_world_test_predictions, real_world_test_labels, real_world_num_classes)

# Print the results
print("Confusion Matrix:")
print(real_world_conf_matrix)
print("\nPrecision:")
print(real_world_precision)
print("\nRecall:")
print(real_world_recall)
print("\nF1 Score:")
print(real_world_f1)

In [None]:
import matplotlib.pyplot as plt

plt.title("Loss throughout Epochs")
plt.plot(valLoss, label="val")
plt.plot(trainLoss, label = "train")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

plt.plot(acc)
plt.show()