In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
import sys
path = '/content/drive/MyDrive/ENEL645-garbage-classification-model'
if not path in sys.path:
  sys.path.append(path)

In [16]:
print(sys.path)

['/content', '/env/python', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.10/dist-packages/IPython/extensions', '/usr/local/lib/python3.10/dist-packages/setuptools/_vendor', '/root/.ipython', '/content/drive/MyDrive/ENEL645-garbage_classification-model', '/content/drive/MyDrive/ENEL645-garbage-classification-model']


In [17]:
from image_model import *

In [18]:
# check if GPU is available
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [None]:
# read the files
root_path = '/work/TALC/enel645_2024f/garbage_data'

train_folder = '/CVPR_2024_dataset_Train'
val_folder = '/CVPR_2024_dataset_Val'
test_folder = '/CVPR_2024_dataset_Test'

train_path = root_path + train_folder
val_path = root_path + val_folder
test_path = root_path + test_folder


In [19]:
# data transformation
data_transform = transforms.Compose([
  transforms.Resize(256),
  transforms.CenterCrop(224),
  transforms.RandomHorizontalFlip(),
  transforms.RandomVerticalFlip(),
  transforms.RandomRotation(90),
  transforms.RandomAffine(60, scale=(1, 1.3)),
  transforms.ColorJitter(brightness=0.3, contrast=0.3, hue=0.2),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # empirical numbers for resnet
])
data_transform_test = transforms.Compose([
  transforms.Resize(256),
  transforms.CenterCrop(224),
  transforms.Resize(270),                                 # change input size
  transforms.CenterCrop(256),                             # change input size
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Load datasets
train_dataset = ImageFolder(root=train_path, transform= data_transform)
val_dataset = ImageFolder(root=val_path, transform= data_transform)
test_dataset = ImageFolder(root=test_path, transform= data_transform_test)

In [None]:
# Define batch size and number of workers (adjust as needed)
batch_size = 32
num_workers = 4

In [None]:
# Create data loaders
img_train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
img_val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
img_test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
train_size = len(img_train_loader) * batch_size
val_size = len(img_val_loader) * batch_size
test_size = len(img_test_loader) * batch_size

In [19]:
# print the distribution of the train dataset
total = []
for i, data in enumerate(img_train_loader, 0):
  labels = data[1].numpy()
  total = np.append(total, labels)
unique_lable, count = np.unique(total, return_counts=True)
print(dict(zip(unique_lable, count)))

In [None]:
# calculate weight for each class in cross entropy loss
class_weights = 1 / count
class_weights_sum = np.sum(class_weights)
class_weights /= class_weights_sum                          # normalize the class weights
class_weights = torch.tensor(class_weights, dtype=torch.float32)           # convert to tensor to initialize loss function
class_weights = class_weights.to(device)

In [None]:
# classes: Black, Green Blur, TTD
class_names = train_dataset.classes
print(class_names)
print("Train set:", train_size)
print("Val set:", val_size)
print("Test set:", test_size)

In [None]:
# train iterator can wraps an iterator around dataset for easy access
train_iterator = iter(img_train_loader)
train_batch = next(train_iterator)
print(train_batch[0].size())
print(train_batch[1].size())

In [None]:
# set up the model
image_model = GarbageModel(num_classes=4, transfer=True)
image_model.to(device)
print(image_model)

In [None]:
# set up loss, optimizer, and scheduler
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.AdamW(image_model.parameters(), lr = 0.001)
scheduler = ExponentialLR(optimizer, gamma=0.9)
finetune_optimizer = torch.optim.AdamW(image_model.parameters(), lr = 1e-6)
finetune_scheduler = ExponentialLR(optimizer, gamma=0.9)

In [None]:
num_epoch = 15
finetune_num_epoch = 5

In [None]:
img_path = './garbage_image_model.pth'              # Path to save the best model
best_loss = 1e+20


# train process
train_loss, train_acc, val_loss, val_acc = image_model.train_multi_epochs_and_save_best_model(
    img_train_loader, img_val_loader, train_size, val_size, criterion, img_path,
    optimizer, scheduler, num_epoch,
    finetune_optimizer, finetune_scheduler, finetune_num_epoch
  )

print()
print('Finished traning')

In [None]:
# test process
# get the model
image_model_test = GarbageModel(num_classes=4, transfer=False)
image_model_test.load_state_dict(torch.load(img_path))
image_model_test.to(device)

# testing loop
labels_test, predict_test = predict(image_model_test, img_test_loader, test_size)

# confusion matrix
print("confusion matrix:")
cm = confusion_matrix(labels_test, predict_test)
print(cm)