# Imports and Mounting

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pickle
import os
import random
import shutil
import pandas as pd
import numpy as np
import csv

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset
import torchvision.models as models
from torchvision.models import ResNet18_Weights
import torch.optim as optim

from PIL import Image
from torch.utils.data import Dataset

!pip install tqdm -q
from tqdm import tqdm

!pip install rdkit -q
from rdkit import Chem
from rdkit.Chem.rdMolDescriptors import CalcMolFormula

from collections import defaultdict
import re

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.9/34.9 MB[0m [31m43.4 MB/s[0m eta [36m0:00:00[0m
[?25h

# Sampling the data

In [None]:
project_root = "/content/drive/Shareddrives/CIS5190FinalProj"

handdrawn_root = "/content/drive/Shareddrives/CIS5190FinalProj/DECIMER_HDM_Dataset_Images"
computer_root = "/content/drive/Shareddrives/CIS5190FinalProj/Img2Mol"

In [None]:
print(f"There are {len(os.listdir(handdrawn_root))} handdrawn images")
print(f"There are {len(os.listdir(computer_root))} computer generated images")

There are 5088 handdrawn images
There are 10880 computer generated images


In [None]:
## Training : Validation : Testing = 7 : 2 : 1
# NUMBER_OF_TRAINING_IMGS = 7000
# NUMBER_OF_VAL_IMGS = NUMBER_OF_TRAINING_IMGS // 7 * 2
# NUMBER_OF_TESTING_IMGS = NUMBER_OF_TRAINING_IMGS // 7

## Creating image sets

In [None]:
def create_new_directory(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory)

In [None]:
## Randomly sample the images
def split_data(comp_gen_percentage=0.5):
    if comp_gen_percentage < 0.5:
        print("Computer generated percentage cannot be smaller than 0.5")
        return

    # Computer generated images
    num_comp_gen_imgs_train = int(comp_gen_percentage * NUMBER_OF_TRAINING_IMGS)
    num_comp_gen_imgs_val = int(comp_gen_percentage * NUMBER_OF_VAL_IMGS)
    num_comp_gen_imgs_test = int(comp_gen_percentage * NUMBER_OF_TESTING_IMGS)


    files = os.listdir(computer_root) # a list of names of computer-generated images
    random.shuffle(files)

    comp_gen_filenames_train = files[:num_comp_gen_imgs_train]
    comp_gen_filenames_val = files[num_comp_gen_imgs_train: num_comp_gen_imgs_train+num_comp_gen_imgs_val]
    comp_gen_filenames_test = files[num_comp_gen_imgs_train+num_comp_gen_imgs_val:]

    # Create the directories
    create_new_directory(train_root)
    create_new_directory(val_root)
    create_new_directory(test_root)

    # Copy the images into the directories
    for filename in comp_gen_filenames_train:
        shutil.copy(os.path.join(computer_root, filename), train_root)
    for filename in comp_gen_filenames_val:
        shutil.copy(os.path.join(computer_root, filename), val_root)
    for filename in comp_gen_filenames_test:
        shutil.copy(os.path.join(computer_root, filename), test_root)



    # Handdrawn images
    num_hand_imgs_train = int((1 - comp_gen_percentage) * NUMBER_OF_TRAINING_IMGS)
    num_hand_imgs_val = int((1 - comp_gen_percentage) * NUMBER_OF_VAL_IMGS)
    num_hand_imgs_test = int((1 - comp_gen_percentage) * NUMBER_OF_TESTING_IMGS)


    files = os.listdir(handdrawn_root)
    random.shuffle(files)

    hand_filenames_train = files[:num_hand_imgs_train]
    hand_filenames_val = files[num_hand_imgs_train: num_hand_imgs_train+num_hand_imgs_val]
    hand_filenames_test = files[num_hand_imgs_train+num_hand_imgs_val:]

    # Copy the images into the directories
    for filename in hand_filenames_train:
        shutil.copy(os.path.join(handdrawn_root, filename), train_root)
    for filename in hand_filenames_val:
        shutil.copy(os.path.join(handdrawn_root, filename), val_root)
    for filename in hand_filenames_test:
        shutil.copy(os.path.join(handdrawn_root, filename), test_root)

In [None]:
## Only need to run this once
#split_data(1)

## Loading the data into a dataloader

### Get the labels

In [None]:
periodic_table = ["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne",
                  "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca",
                  "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
                  "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr",
                  "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn",
                  "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd",
                  "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb",
                  "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg",
                  "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th",
                  "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm",
                  "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds",
                  "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"]

In [None]:
def smiles_to_formula(smiles):
  mol = Chem.MolFromSmiles(smiles)
  formula = CalcMolFormula(mol)
  return formula

def formula_to_atoms(formula):
  atoms = defaultdict(int)
  elements = re.findall(r'([A-Z][a-z]*)(\d*)', formula)
  for element, count in elements:
    count = int(count) if count else 1
    atoms[element] += count
  return dict(atoms)


def smiles_to_atoms(smiles):
  formula = smiles_to_formula(smiles)
  return formula_to_atoms(formula)

def atoms_to_array(atoms):
  # Initialize array with zeros for each element
  element_array = [0] * 118
  # Place the atom counts into the array based on the dictionary
  for element, count in atoms.items():
      if element in periodic_table:
          index = periodic_table.index(element)
          element_array[index] = count
  return element_array

def atoms_to_binary_array(atoms):
    element_array = [0] * 118
    for element in atoms:
        if element in periodic_table:
            index = periodic_table.index(element)
            element_array[index] = 1
    return element_array

In [None]:
handdrawn_df = pd.read_csv(project_root + "/DECIMER_HDM_Dataset_SMILES.tsv", sep='\t')
handdrawn_smiles_dict = handdrawn_df.set_index('IDs')['SMILES'].to_dict()

# Process each SMILES to convert into atom arrays
handdrawn_atom_arrays = {key: atoms_to_array(smiles_to_atoms(value)) for key, value in handdrawn_smiles_dict.items()}
handdrawn_atom_binary_arrays = {key: atoms_to_binary_array(smiles_to_atoms(value)) for key, value in handdrawn_smiles_dict.items()}

# To verify the transformation, print the first 5 elements of the transformed dictionary
for key in list(handdrawn_atom_arrays.keys())[:5]:
    print(key, ":", handdrawn_atom_arrays[key])
for key in list(handdrawn_atom_binary_arrays.keys())[:5]:
    print(key, ":", handdrawn_atom_binary_arrays[key])

CDK_Depict_1_2 : [3, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
CDK_Depict_1_4 : [6, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
CDK_Depict_1_5 : [45, 0, 0, 0, 0, 21, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [None]:
with open("/content/drive/Shareddrives/CIS5190FinalProj/Img2Mol_map.pkl", 'rb') as f:
    comp_gen_df = pickle.load(f)
comp_gen_smiles_dict = comp_gen_df.set_index('Image')['SMILES'].to_dict()
comp_gen_atom_arrays = {key: atoms_to_array(smiles_to_atoms(value)) for key, value in comp_gen_smiles_dict.items()}
comp_gen_atom_binary_arrays = {key: atoms_to_binary_array(smiles_to_atoms(value)) for key, value in comp_gen_smiles_dict.items()}

In [None]:
# Verify transformation for both datasets by printing the first 5 elements
print("Hand-drawn dataset first 5 atom arrays:")
for key in list(handdrawn_atom_arrays.keys())[:5]:
    print(key, ":", handdrawn_atom_arrays[key])

print("\nComputer-generated dataset first 5 atom arrays:")
for key in list(comp_gen_atom_arrays.keys())[:5]:
    print(key, ":", comp_gen_atom_arrays[key])

print("Hand-drawn dataset first 5 atom arrays:")
for key in list(handdrawn_atom_binary_arrays.keys())[:5]:
    print(key, ":", handdrawn_atom_binary_arrays[key])

print("\nComputer-generated dataset first 5 atom arrays:")
for key in list(comp_gen_atom_binary_arrays.keys())[:5]:
    print(key, ":", comp_gen_atom_binary_arrays[key])

Hand-drawn dataset first 5 atom arrays:
CDK_Depict_1_2 : [3, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
CDK_Depict_1_4 : [6, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
CDK_Depict_1_5 : [45, 0, 0, 0, 0, 21, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [None]:
## Combine the two dictionaries together
combined_mapping = handdrawn_atom_arrays.copy()
combined_binary_mapping = handdrawn_atom_binary_arrays.copy()

for key, value in comp_gen_atom_arrays.items():
    # Remove .png from the key
    key = key.replace('.png', '')
    combined_mapping[key] = value

for key, value in comp_gen_atom_binary_arrays.items():
    # Remove .png from the key
    key = key.replace('.png', '')
    combined_binary_mapping[key] = value

In [None]:
## Returns the label based on the filename of the image

def get_binary_label(filename):
    return combined_binary_mapping.get(filename, None)

In [None]:
def validate_dataset(image_names, root_dir, label_dict):
    valid_image_names = []
    for img_name in image_names:
        img_path = os.path.join(root_dir, img_name)
        if os.path.isfile(img_path) and img_name.replace(".png", "") in label_dict:
            valid_image_names.append(img_name)
    return valid_image_names

In [None]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, label_dict, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_names = validate_dataset(os.listdir(root_dir), root_dir, label_dict)
        self.label_dict = label_dict

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        label = self.label_dict[img_name.replace(".png", "")]

        if self.transform:
            image = self.transform(image)

        label = torch.tensor(label, dtype=torch.float32)
        return image, label

In [None]:
def create_loaders(train_path, val_path, test_path):
    train_dataset = CustomDataset(root_dir=train_path, label_dict=combined_binary_mapping, transform=transform)
    val_dataset = CustomDataset(root_dir=val_path, label_dict=combined_binary_mapping, transform=transform)
    test_dataset = CustomDataset(root_dir=test_path, label_dict=combined_binary_mapping, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0)

    return train_loader, val_loader, test_loader

### Single point sanity check

In [None]:
## Transformation
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_root = project_root + "/Train5000_0.25"
val_root = project_root + "/Val5000_0.25"
test_root = project_root + "/Test5000_0.25"

In [None]:
train_binary_dataset = CustomDataset(root_dir=train_root, label_dict=combined_binary_mapping, transform=transform)
val_binary_dataset = CustomDataset(root_dir=val_root, label_dict=combined_binary_mapping, transform=transform)
test_binary_dataset = CustomDataset(root_dir=test_root, label_dict=combined_binary_mapping, transform=transform)

train_binary_loader = DataLoader(train_binary_dataset, batch_size=32, shuffle=True, num_workers=0)
val_binary_loader = DataLoader(val_binary_dataset, batch_size=32, shuffle=False, num_workers=0)
test_binary_loader = DataLoader(test_binary_dataset, batch_size=32, shuffle=False, num_workers=0)

In [None]:
single_item_index = 0
single_item_binary_dataset = Subset(train_binary_dataset, [single_item_index])
single_item_binary_loader = DataLoader(single_item_binary_dataset, batch_size=1, shuffle=False)
for single_image, single_label in single_item_binary_loader:
    print("Image shape:", single_image.shape)
    print("Label:", single_label)

Image shape: torch.Size([1, 3, 256, 256])
Label: tensor([[1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])


# Data Augmentation

In [None]:
aug1 = transforms.Compose([
    transforms.RandomHorizontalFlip(),   # Random horizontal flip
    transforms.RandomRotation(degrees=15), # Random rotation by up to 15 degrees
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10)  # Random affine transformations
])

# aug2 = transforms.Compose([
#     transforms.RandomResizedCrop(size=224),  # Random resized crop to 224x224
#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Color jitter
#     transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10)  # Random affine transformations
# ])

# aug3 = transforms.Compose([
#     transforms.RandomHorizontalFlip(),   # Random horizontal flip
#     transforms.RandomRotation(degrees=15), # Random rotation by up to 15 degrees
#     transforms.RandomResizedCrop(size=224),  # Random resized crop to 224x224
#     transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Color jitter
#     transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10)  # Random affine transformations
# ])

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    aug1,
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


# Model Pipeline


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [None]:
def calculate_accuracy(preds, labels):
    # Ensure both predictions and labels are binary (0 or 1)
    preds = preds.round().int()
    labels = labels.int()

    # Count matches where both prediction and label are 1
    correct_ones = (preds & labels).sum().item()

    # Count total cases where either prediction or label is 1
    total_ones = ((preds == 1) | (labels == 1)).sum().item()

    # Calculate accuracy based on matches and total relevant cases
    accuracy = correct_ones / total_ones if total_ones > 0 else 0.0
    return accuracy


In [None]:
def calculate_batch_accuracy(outputs, labels):
    probs = torch.sigmoid(outputs)
    preds = (probs >= 0.5).float()

    # Convert tensors to integer type for bitwise operations
    preds_int = preds.int()
    labels_int = labels.int()

    # Correct matches where both prediction and label are 1
    correct_ones = (preds_int & labels_int).sum().item()

    # Total relevant cases where either prediction or label is 1
    total_ones = ((preds_int == 1) | (labels_int == 1)).sum().item()

    # Calculate accuracy based on matches and total relevant cases
    accuracy = correct_ones / total_ones if total_ones > 0 else 0.0
    return accuracy


In [None]:
def train_single_point(model, data_loader, criterion, optimizer, num_epochs=20):
    model.train()  # Set model to training mode
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        correct_train = 0.0
        total_train = 0

        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to the correct device

            optimizer.zero_grad()  # Zero the gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            epoch_loss += loss.item()

            # Calculate batch accuracy
            batch_correct_train = calculate_batch_accuracy(outputs, labels)
            correct_train += batch_correct_train * labels.size(0)
            total_train += labels.size(0)

        epoch_loss /= len(data_loader)
        epoch_accuracy = correct_train / total_train

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')


In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5, log_interval=10):
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0.0
        total_train = 0

        print(f"Epoch [{epoch+1}/{num_epochs}]")

        for batch_idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            batch_correct_train = calculate_batch_accuracy(outputs, labels)
            correct_train += batch_correct_train * labels.size(0)  # Accumulate correct predictions
            total_train += labels.size(0)  # Accumulate total predictions

            if batch_idx % log_interval == 0:
                current_train_loss = running_loss / (batch_idx + 1)
                current_train_accuracy = correct_train / total_train
                print(f"Train Batch [{batch_idx}/{len(train_loader)}], Loss: {current_train_loss:.4f}, Accuracy: {current_train_accuracy:.4f}")

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct_val = 0.0
        total_val = 0

        with torch.no_grad():
            for batch_idx, (inputs, labels) in enumerate(val_loader):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                batch_correct_val = calculate_batch_accuracy(outputs, labels)
                correct_val += batch_correct_val * labels.size(0)  # Accumulate correct predictions
                total_val += labels.size(0)  # Accumulate total predictions

                if batch_idx % log_interval == 0:
                    current_val_loss = val_loss / (batch_idx + 1)
                    current_val_accuracy = correct_val / total_val
                    print(f"Val Batch [{batch_idx}/{len(val_loader)}], Loss: {current_val_loss:.4f}, Accuracy: {current_val_accuracy:.4f}")

        val_loss = val_loss / len(val_loader)
        val_accuracy = correct_val / total_val

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        # Append values for this epoch to the lists
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

    return train_losses, train_accuracies, val_losses, val_accuracies


In [None]:
def test_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    test_accuracy = 0.0
    total_batches = len(test_loader)

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(tqdm(test_loader, desc="Testing")):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Calculate accuracy using the provided function
            batch_accuracy = calculate_batch_accuracy(outputs, labels)
            test_accuracy += batch_accuracy

            # Debugging: Print predictions and targets
            if i % 50 == 0:
                # Apply sigmoid and threshold to show rounded predictions
                probs = torch.sigmoid(outputs)
                preds = (probs >= 0.5).float()
                print(f'Batch {i}/{total_batches}, Current Test Loss: {test_loss/(i+1):.4f}, Current Test Accuracy: {test_accuracy/(i+1):.4f}')

    test_loss /= total_batches
    test_accuracy /= total_batches

    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

    return test_loss, test_accuracy

In [None]:
## Save things to a csv
def save_to_csv(csv_path, name, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc):
    with open(csv_path, mode='a', newline='') as file:
        writer = csv.writer(file)

        writer.writerow([name, train_loss, train_acc, val_loss, val_acc, test_loss, test_acc])

In [None]:
NUM_EPOCHS = 10

## Single Point Testing

In [None]:
weights = ResNet18_Weights.DEFAULT
num_elements = 118

resnet_binary = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
for param in resnet_binary.parameters():
    param.requires_grad = True  # Unfreeze all layers

num_binary_features = resnet_binary.fc.in_features
resnet_binary.fc = nn.Linear(num_binary_features, num_elements)
resnet_binary = resnet_binary.to(device)  # Move model to GPU

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 66.9MB/s]


In [None]:
# Load Single Point ResNet model
resnet_single_binary = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
for param in resnet_single_binary.parameters():
    param.requires_grad = True  # Unfreeze all layers

num_single_binary_features = resnet_single_binary.fc.in_features
resnet_single_binary.fc = nn.Linear(num_single_binary_features, num_elements)
resnet_single_binary = resnet_single_binary.to(device)  # Move model to GPU

criterion = nn.BCEWithLogitsLoss()
optimizer_single = torch.optim.SGD(resnet_single_binary.parameters(), lr=0.001, momentum=0.9)

In [None]:
train_single_point(resnet_single_binary, single_item_binary_loader, criterion, optimizer_single, num_epochs=100)
test_model(resnet_single_binary, single_item_binary_loader, criterion)

Epoch [1/100], Loss: 0.7354, Accuracy: 0.0000
Epoch [2/100], Loss: 0.7342, Accuracy: 0.0000
Epoch [3/100], Loss: 0.7320, Accuracy: 0.0000
Epoch [4/100], Loss: 0.7288, Accuracy: 0.0000
Epoch [5/100], Loss: 0.7248, Accuracy: 0.0000
Epoch [6/100], Loss: 0.7201, Accuracy: 0.0000
Epoch [7/100], Loss: 0.7149, Accuracy: 0.0000
Epoch [8/100], Loss: 0.7091, Accuracy: 0.0000
Epoch [9/100], Loss: 0.7028, Accuracy: 0.0000
Epoch [10/100], Loss: 0.6961, Accuracy: 0.0000
Epoch [11/100], Loss: 0.6892, Accuracy: 0.0000
Epoch [12/100], Loss: 0.6820, Accuracy: 0.0189
Epoch [13/100], Loss: 0.6746, Accuracy: 0.0196
Epoch [14/100], Loss: 0.6670, Accuracy: 0.0200
Epoch [15/100], Loss: 0.6593, Accuracy: 0.0208
Epoch [16/100], Loss: 0.6515, Accuracy: 0.0222
Epoch [17/100], Loss: 0.6436, Accuracy: 0.0227
Epoch [18/100], Loss: 0.6357, Accuracy: 0.0238
Epoch [19/100], Loss: 0.6277, Accuracy: 0.0238
Epoch [20/100], Loss: 0.6198, Accuracy: 0.0238
Epoch [21/100], Loss: 0.6118, Accuracy: 0.0263
Epoch [22/100], Loss: 

Testing: 100%|██████████| 1/1 [00:00<00:00, 29.86it/s]

Batch 0/1, Current Test Loss: 0.2500, Current Test Accuracy: 1.0000
Predictions: [[1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
Targets: [[1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
Test Loss: 0.2500, Test Accuracy: 1.0000





(0.2500499188899994, 1.0)

We can see that the model does "memorize" the single data point

# VGG19

In [None]:
def init_vgg19_hyperparameters():
    vgg19 = models.vgg19(pretrained=True)

    # Freeze the pretrained weights
    for param in vgg19.parameters():
        param.requires_grad = False


    num_features = vgg19.classifier[6].in_features
    num_elements = 118
    vgg19.classifier[6] = nn.Linear(num_features, num_elements)

    vgg19 = vgg19.to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(vgg19.parameters(), lr=1e-4, momentum=0.9)

    return vgg19, criterion, optimizer

## 5000, 0.25

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_0.25",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_0.25",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_vgg19_hyperparameters()

## Train the model
vgg_train_losses_5000_25, vgg_train_accuracies_5000_25, vgg_val_losses_5000_25, vgg_val_accuracies_5000_25 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
vgg_test_loss_5000_25, vgg_test_accuracy_5000_25 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/vgg_5000_25.pth')

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:06<00:00, 82.5MB/s]


Epoch [1/10]
Train Batch [0/110], Loss: 0.7167, Accuracy: 0.0305
Train Batch [10/110], Loss: 0.7071, Accuracy: 0.0295
Train Batch [20/110], Loss: 0.7034, Accuracy: 0.0300
Train Batch [30/110], Loss: 0.6989, Accuracy: 0.0306
Train Batch [40/110], Loss: 0.6933, Accuracy: 0.0318
Train Batch [50/110], Loss: 0.6874, Accuracy: 0.0335
Train Batch [60/110], Loss: 0.6817, Accuracy: 0.0350
Train Batch [70/110], Loss: 0.6761, Accuracy: 0.0364
Train Batch [80/110], Loss: 0.6705, Accuracy: 0.0377
Train Batch [90/110], Loss: 0.6650, Accuracy: 0.0395
Train Batch [100/110], Loss: 0.6595, Accuracy: 0.0412
Val Batch [0/32], Loss: 0.5884, Accuracy: 0.0918
Val Batch [10/32], Loss: 0.5882, Accuracy: 0.0930
Val Batch [20/32], Loss: 0.5884, Accuracy: 0.0934
Val Batch [30/32], Loss: 0.5883, Accuracy: 0.0925
Epoch [1/10], Train Loss: 0.6548, Train Accuracy: 0.0429, Val Loss: 0.5883, Val Accuracy: 0.0923
Epoch [2/10]
Train Batch [0/110], Loss: 0.5957, Accuracy: 0.0572
Train Batch [10/110], Loss: 0.5897, Accurac

Testing:   8%|▊         | 1/12 [00:14<02:41, 14.67s/it]

Batch 0/12, Current Test Loss: 0.2101, Current Test Accuracy: 0.6024


Testing: 100%|██████████| 12/12 [06:10<00:00, 30.85s/it]

Test Loss: 0.2112, Test Accuracy: 0.5945





In [None]:
## Save the results in a csv
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "VGG 5000 0.25",
            vgg_train_losses_5000_25,
            vgg_train_accuracies_5000_25,
            vgg_val_losses_5000_25,
            vgg_val_accuracies_5000_25,
            vgg_test_loss_5000_25,
            vgg_test_accuracy_5000_25)

## 5000, 0.5

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_0.5",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_0.5",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_vgg19_hyperparameters()

## Train the model
vgg_train_losses_5000_50, vgg_train_accuracies_5000_50, vgg_val_losses_5000_50, vgg_val_accuracies_5000_50 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
vgg_test_loss_5000_50, vgg_test_accuracy_5000_50 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/vgg_5000_50.pth')

Epoch [1/10]
Train Batch [0/110], Loss: 0.7126, Accuracy: 0.0303
Train Batch [10/110], Loss: 0.7081, Accuracy: 0.0316
Train Batch [20/110], Loss: 0.7040, Accuracy: 0.0333
Train Batch [30/110], Loss: 0.6991, Accuracy: 0.0348
Train Batch [40/110], Loss: 0.6934, Accuracy: 0.0359
Train Batch [50/110], Loss: 0.6880, Accuracy: 0.0375
Train Batch [60/110], Loss: 0.6825, Accuracy: 0.0392
Train Batch [70/110], Loss: 0.6772, Accuracy: 0.0408
Train Batch [80/110], Loss: 0.6717, Accuracy: 0.0430
Train Batch [90/110], Loss: 0.6662, Accuracy: 0.0453
Train Batch [100/110], Loss: 0.6610, Accuracy: 0.0475
Val Batch [0/32], Loss: 0.5967, Accuracy: 0.1313
Val Batch [10/32], Loss: 0.5916, Accuracy: 0.1332
Val Batch [20/32], Loss: 0.5914, Accuracy: 0.1342
Val Batch [30/32], Loss: 0.5916, Accuracy: 0.1334
Epoch [1/10], Train Loss: 0.6562, Train Accuracy: 0.0496, Val Loss: 0.5917, Val Accuracy: 0.1336
Epoch [2/10]
Train Batch [0/110], Loss: 0.5936, Accuracy: 0.0774
Train Batch [10/110], Loss: 0.5933, Accurac

Testing:   6%|▋         | 1/16 [00:34<08:39, 34.61s/it]

Batch 0/16, Current Test Loss: 0.2295, Current Test Accuracy: 0.7365


Testing: 100%|██████████| 16/16 [07:36<00:00, 28.51s/it]

Test Loss: 0.2170, Test Accuracy: 0.7191





In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "VGG 5000 0.50",
            vgg_train_losses_5000_50,
            vgg_train_accuracies_5000_50,
            vgg_val_losses_5000_50,
            vgg_val_accuracies_5000_50,
            vgg_test_loss_5000_50,
            vgg_test_accuracy_5000_50)

## 5000, 0.75

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_0.75",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_0.75",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_vgg19_hyperparameters()

## Train the model
vgg_train_losses_5000_75, vgg_train_accuracies_5000_75, vgg_val_losses_5000_75, vgg_val_accuracies_5000_75 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
vgg_test_loss_5000_75, vgg_test_accuracy_5000_75 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/vgg_5000_75.pth')

Epoch [1/10]
Train Batch [0/110], Loss: 0.7146, Accuracy: 0.0393
Train Batch [10/110], Loss: 0.7097, Accuracy: 0.0411
Train Batch [20/110], Loss: 0.7062, Accuracy: 0.0411
Train Batch [30/110], Loss: 0.7018, Accuracy: 0.0431
Train Batch [40/110], Loss: 0.6963, Accuracy: 0.0443
Train Batch [50/110], Loss: 0.6909, Accuracy: 0.0459
Train Batch [60/110], Loss: 0.6856, Accuracy: 0.0475
Train Batch [70/110], Loss: 0.6805, Accuracy: 0.0493
Train Batch [80/110], Loss: 0.6752, Accuracy: 0.0512
Train Batch [90/110], Loss: 0.6699, Accuracy: 0.0535
Train Batch [100/110], Loss: 0.6645, Accuracy: 0.0562
Val Batch [0/32], Loss: 0.5975, Accuracy: 0.1417
Val Batch [10/32], Loss: 0.5960, Accuracy: 0.1375
Val Batch [20/32], Loss: 0.5960, Accuracy: 0.1334
Val Batch [30/32], Loss: 0.5962, Accuracy: 0.1313
Epoch [1/10], Train Loss: 0.6599, Train Accuracy: 0.0583, Val Loss: 0.5960, Val Accuracy: 0.1313
Epoch [2/10]
Train Batch [0/110], Loss: 0.6018, Accuracy: 0.0979
Train Batch [10/110], Loss: 0.5964, Accurac

Testing:   6%|▋         | 1/16 [00:29<07:17, 29.16s/it]

Batch 0/16, Current Test Loss: 0.2201, Current Test Accuracy: 0.8103


Testing: 100%|██████████| 16/16 [07:32<00:00, 28.27s/it]

Test Loss: 0.2178, Test Accuracy: 0.7985





In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "VGG 5000 0.75",
            vgg_train_losses_5000_75,
            vgg_train_accuracies_5000_75,
            vgg_val_losses_5000_75,
            vgg_val_accuracies_5000_75,
            vgg_test_loss_5000_75,
            vgg_test_accuracy_5000_75)

## 5000, 1.0

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_1.0",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_1.0",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_vgg19_hyperparameters()

## Train the model
vgg_train_losses_5000_100, vgg_train_accuracies_5000_100, vgg_val_losses_5000_100, vgg_val_accuracies_5000_100 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
vgg_test_loss_5000_100, vgg_test_accuracy_5000_100 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/vgg_5000_100.pth')



Epoch [1/10]
Train Batch [0/110], Loss: 0.6950, Accuracy: 0.0393


In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "VGG 5000 1.00",
            vgg_train_losses_5000_100,
            vgg_train_accuracies_5000_100,
            vgg_val_losses_5000_100,
            vgg_val_accuracies_5000_100,
            vgg_test_loss_5000_100,
            vgg_test_accuracy_5000_100)

## 10000, 0.5

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train10000_0.5",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val10000_0.5",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_vgg19_hyperparameters()

## Train the model
vgg_train_losses_10000_50, vgg_train_accuracies_10000_50, vgg_val_losses_10000_50, vgg_val_accuracies_10000_50 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
vgg_test_loss_10000_50, vgg_test_accuracy_10000_50 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/vgg_10000_50.pth')

Epoch [1/10]
Train Batch [0/219], Loss: 0.7129, Accuracy: 0.0293
Train Batch [10/219], Loss: 0.7103, Accuracy: 0.0255
Train Batch [20/219], Loss: 0.7072, Accuracy: 0.0251
Train Batch [30/219], Loss: 0.7027, Accuracy: 0.0259
Train Batch [40/219], Loss: 0.6976, Accuracy: 0.0268
Train Batch [50/219], Loss: 0.6921, Accuracy: 0.0277
Train Batch [60/219], Loss: 0.6865, Accuracy: 0.0287
Train Batch [70/219], Loss: 0.6810, Accuracy: 0.0301
Train Batch [80/219], Loss: 0.6754, Accuracy: 0.0315
Train Batch [90/219], Loss: 0.6700, Accuracy: 0.0334
Train Batch [100/219], Loss: 0.6646, Accuracy: 0.0351
Train Batch [110/219], Loss: 0.6593, Accuracy: 0.0375
Train Batch [120/219], Loss: 0.6542, Accuracy: 0.0393
Train Batch [130/219], Loss: 0.6491, Accuracy: 0.0413
Train Batch [140/219], Loss: 0.6441, Accuracy: 0.0435
Train Batch [150/219], Loss: 0.6393, Accuracy: 0.0459
Train Batch [160/219], Loss: 0.6344, Accuracy: 0.0488
Train Batch [170/219], Loss: 0.6297, Accuracy: 0.0516
Train Batch [180/219], Los

Testing:   3%|▎         | 1/32 [00:28<14:56, 28.91s/it]

Batch 0/32, Current Test Loss: 0.1402, Current Test Accuracy: 0.7099


Testing: 100%|██████████| 32/32 [15:00<00:00, 28.15s/it]

Test Loss: 0.1347, Test Accuracy: 0.6952





In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "VGG 10000 0.5",
            vgg_train_losses_10000_50,
            vgg_train_accuracies_10000_50,
            vgg_val_losses_10000_50,
            vgg_val_accuracies_10000_50,
            vgg_test_loss_10000_50,
            vgg_test_accuracy_10000_50)

## 10000, 0.75

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train10000_0.75",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val10000_0.75",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_vgg19_hyperparameters()

## Train the model
vgg_train_losses_10000_75, vgg_train_accuracies_10000_75, vgg_val_losses_10000_75, vgg_val_accuracies_10000_75 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
vgg_test_loss_10000_75, vgg_test_accuracy_10000_75 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/vgg_10000_75.pth')

Epoch [1/10]
Train Batch [0/219], Loss: 0.6929, Accuracy: 0.0460
Train Batch [10/219], Loss: 0.6906, Accuracy: 0.0447
Train Batch [20/219], Loss: 0.6869, Accuracy: 0.0450
Train Batch [30/219], Loss: 0.6828, Accuracy: 0.0451
Train Batch [40/219], Loss: 0.6785, Accuracy: 0.0467
Train Batch [50/219], Loss: 0.6732, Accuracy: 0.0483
Train Batch [60/219], Loss: 0.6678, Accuracy: 0.0500
Train Batch [70/219], Loss: 0.6625, Accuracy: 0.0518
Train Batch [80/219], Loss: 0.6577, Accuracy: 0.0540
Train Batch [90/219], Loss: 0.6524, Accuracy: 0.0562
Train Batch [100/219], Loss: 0.6474, Accuracy: 0.0587
Train Batch [110/219], Loss: 0.6425, Accuracy: 0.0613
Train Batch [120/219], Loss: 0.6375, Accuracy: 0.0644
Train Batch [130/219], Loss: 0.6327, Accuracy: 0.0674
Train Batch [140/219], Loss: 0.6279, Accuracy: 0.0706
Train Batch [150/219], Loss: 0.6232, Accuracy: 0.0741
Train Batch [160/219], Loss: 0.6185, Accuracy: 0.0780
Train Batch [170/219], Loss: 0.6140, Accuracy: 0.0817
Train Batch [180/219], Los

Testing:   3%|▎         | 1/32 [00:28<14:56, 28.92s/it]

Batch 0/32, Current Test Loss: 0.1359, Current Test Accuracy: 0.7697


Testing: 100%|██████████| 32/32 [14:40<00:00, 27.53s/it]

Test Loss: 0.1295, Test Accuracy: 0.7918





In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "VGG 10000 0.75",
            vgg_train_losses_10000_75,
            vgg_train_accuracies_10000_75,
            vgg_val_losses_10000_75,
            vgg_val_accuracies_10000_75,
            vgg_test_loss_10000_75,
            vgg_test_accuracy_10000_75)

## 10000, 1.0

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train10000_1.0",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val10000_1.0",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_vgg19_hyperparameters()

## Train the model
vgg_train_losses_10000_100, vgg_train_accuracies_10000_100, vgg_val_losses_10000_100, vgg_val_accuracies_10000_100 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
vgg_test_loss_10000_100, vgg_test_accuracy_10000_100 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/vgg_10000_100.pth')



Epoch [1/10]
Train Batch [0/219], Loss: 0.7072, Accuracy: 0.0383
Train Batch [10/219], Loss: 0.7023, Accuracy: 0.0383
Train Batch [20/219], Loss: 0.6984, Accuracy: 0.0392
Train Batch [30/219], Loss: 0.6940, Accuracy: 0.0410
Train Batch [40/219], Loss: 0.6892, Accuracy: 0.0432
Train Batch [50/219], Loss: 0.6844, Accuracy: 0.0449
Train Batch [60/219], Loss: 0.6790, Accuracy: 0.0472
Train Batch [70/219], Loss: 0.6739, Accuracy: 0.0495
Train Batch [80/219], Loss: 0.6687, Accuracy: 0.0519
Train Batch [90/219], Loss: 0.6637, Accuracy: 0.0544
Train Batch [100/219], Loss: 0.6589, Accuracy: 0.0570
Train Batch [110/219], Loss: 0.6537, Accuracy: 0.0600
Train Batch [120/219], Loss: 0.6487, Accuracy: 0.0631
Train Batch [130/219], Loss: 0.6439, Accuracy: 0.0664
Train Batch [140/219], Loss: 0.6390, Accuracy: 0.0698
Train Batch [150/219], Loss: 0.6343, Accuracy: 0.0737
Train Batch [160/219], Loss: 0.6295, Accuracy: 0.0779
Train Batch [170/219], Loss: 0.6250, Accuracy: 0.0822
Train Batch [180/219], Los

In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "VGG 10000 1.0",
            vgg_train_losses_10000_100,
            vgg_train_accuracies_10000_100,
            vgg_val_losses_10000_100,
            vgg_val_accuracies_10000_100,
            vgg_test_loss_10000_100,
            vgg_test_accuracy_10000_100)

# ResNet

In [None]:
def init_resnet_hyperparameters():
    weights = ResNet18_Weights.DEFAULT
    resnet = models.resnet18(weights=weights)
    for param in resnet.parameters():
        param.requires_grad = False

    num_features = resnet.fc.in_features
    num_elements = 118
    resnet.fc = nn.Linear(num_features, num_elements)

    resnet = resnet.to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(resnet.parameters(), lr=1e-4, momentum=0.9)

    return resnet, criterion, optimizer

## 5000, 0.25

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_0.25",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_0.25",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_resnet_hyperparameters()

## Train the model
res_train_losses_5000_25, res_train_accuracies_5000_25, res_val_losses_5000_25, res_val_accuracies_5000_25 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
res_test_loss_5000_25, res_test_accuracy_5000_25 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/resnet_5000_25.pth')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:01<00:00, 36.5MB/s]


Epoch [1/10]
Train Batch [0/110], Loss: 0.7331, Accuracy: 0.0377
Train Batch [10/110], Loss: 0.7317, Accuracy: 0.0346
Train Batch [20/110], Loss: 0.7292, Accuracy: 0.0349
Train Batch [30/110], Loss: 0.7256, Accuracy: 0.0353
Train Batch [40/110], Loss: 0.7217, Accuracy: 0.0359
Train Batch [50/110], Loss: 0.7178, Accuracy: 0.0361
Train Batch [60/110], Loss: 0.7139, Accuracy: 0.0365
Train Batch [70/110], Loss: 0.7099, Accuracy: 0.0370
Train Batch [80/110], Loss: 0.7058, Accuracy: 0.0375
Train Batch [90/110], Loss: 0.7017, Accuracy: 0.0381
Train Batch [100/110], Loss: 0.6977, Accuracy: 0.0386
Val Batch [0/32], Loss: 0.6539, Accuracy: 0.0452
Val Batch [10/32], Loss: 0.6506, Accuracy: 0.0433
Val Batch [20/32], Loss: 0.6516, Accuracy: 0.0428
Val Batch [30/32], Loss: 0.6512, Accuracy: 0.0425
Epoch [1/10], Train Loss: 0.6942, Train Accuracy: 0.0390, Val Loss: 0.6514, Val Accuracy: 0.0425
Epoch [2/10]
Train Batch [0/110], Loss: 0.6513, Accuracy: 0.0433
Train Batch [10/110], Loss: 0.6472, Accurac

Testing:   6%|▋         | 1/16 [00:00<00:08,  1.80it/s]

Batch 0/16, Current Test Loss: 0.2774, Current Test Accuracy: 0.6235


Testing: 100%|██████████| 16/16 [00:07<00:00,  2.08it/s]

Test Loss: 0.2784, Test Accuracy: 0.5507





In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "ResNet 5000 0.25",
            res_train_losses_5000_25,
            res_train_accuracies_5000_25,
            res_val_losses_5000_25,
            res_val_accuracies_5000_25,
            res_test_loss_5000_25,
            res_test_accuracy_5000_25)

## 5000, 0.5

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_0.5",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_0.5",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_resnet_hyperparameters()

## Train the model
res_train_losses_5000_50, res_train_accuracies_5000_50, res_val_losses_5000_50, res_val_accuracies_5000_50 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
res_test_loss_5000_50, res_test_accuracy_5000_50 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/resnet_5000_50.pth')

In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "ResNet 5000 0.50",
            res_train_losses_5000_50,
            res_train_accuracies_5000_50,
            res_val_losses_5000_50,
            res_val_accuracies_5000_50,
            res_test_loss_5000_50,
            res_test_accuracy_5000_50)

## 5000, 0.75

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_0.75",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_0.75",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_resnet_hyperparameters()

## Train the model
res_train_losses_5000_75, res_train_accuracies_5000_75, res_val_losses_5000_75, res_val_accuracies_5000_75 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
res_test_loss_5000_75, res_test_accuracy_5000_75 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/resnet_5000_75.pth')

In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "ResNet 5000 0.75",
            res_train_losses_5000_75,
            res_train_accuracies_5000_75,
            res_val_losses_5000_75,
            res_val_accuracies_5000_75,
            res_test_loss_5000_75,
            res_test_accuracy_5000_75)

## 5000, 1.0

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train5000_1.0",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val5000_1.0",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_resnet_hyperparameters()

## Train the model
res_train_losses_5000_100, res_train_accuracies_5000_100, res_val_losses_5000_100, res_val_accuracies_5000_100 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
res_test_loss_5000_100, res_test_accuracy_5000_100 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/resnet_5000_100.pth')

In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "ResNet 5000 1.00",
            res_train_losses_5000_100,
            res_train_accuracies_5000_100,
            res_val_losses_5000_100,
            res_val_accuracies_5000_100,
            res_test_loss_5000_100,
            res_test_accuracy_5000_100)

## 10000, 0.5

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train10000_0.5",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val10000_0.5",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_resnet_hyperparameters()

## Train the model
res_train_losses_10000_50, res_train_accuracies_10000_50, res_val_losses_10000_50, res_val_accuracies_10000_50 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
res_test_loss_10000_50, res_test_accuracy_10000_50 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/resnet_10000_50.pth')

In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "ResNet 10000 0.50",
            res_train_losses_10000_50,
            res_train_accuracies_10000_50,
            res_val_losses_10000_50,
            res_val_accuracies_10000_50,
            res_test_loss_10000_50,
            res_test_accuracy_10000_50)

## 10000, 0.75

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train10000_0.75",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val10000_0.75",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_resnet_hyperparameters()

## Train the model
res_train_losses_10000_75, res_train_accuracies_10000_75, res_val_losses_10000_75, res_val_accuracies_10000_75 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
res_test_loss_10000_75, res_test_accuracy_10000_75 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/resnet_10000_75.pth')

In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "ResNet 10000 0.75",
            res_train_losses_10000_75,
            res_train_accuracies_10000_75,
            res_val_losses_10000_75,
            res_val_accuracies_10000_75,
            res_test_loss_10000_75,
            res_test_accuracy_10000_75)

## 10000, 1.0

In [None]:
train_loader, val_loader, test_loader = create_loaders(train_path="/content/drive/Shareddrives/CIS5190FinalProj/Train10000_1.0",
                                                       val_path="/content/drive/Shareddrives/CIS5190FinalProj/Val10000_1.0",
                                                       test_path="/content/drive/Shareddrives/CIS5190FinalProj/test_dataset")
model, criterion, optimizer = init_resnet_hyperparameters()

## Train the model
res_train_losses_10000_100, res_train_accuracies_10000_100, res_val_losses_10000_100, res_val_accuracies_10000_100 = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)

## Test the model
res_test_loss_10000_100, res_test_accuracy_10000_100 = test_model(model, test_loader, criterion)

## Save the model
torch.save(model.state_dict(), '/content/drive/Shareddrives/CIS5190FinalProj/resnet_10000_100.pth')

In [None]:
save_to_csv("/content/drive/Shareddrives/CIS5190FinalProj/results2.csv",
            "ResNet 10000 1.00",
            res_train_losses_10000_100,
            res_train_accuracies_10000_100,
            res_val_losses_10000_100,
            res_val_accuracies_10000_100,
            res_test_loss_10000_100,
            res_test_accuracy_10000_100)