In [1]:
# my own convolutional network implementation

In [122]:
import torch
from torch import nn
from torch.utils.data import DataLoader

# Import torchvision 
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

# Import matplotlib for visualization
import matplotlib.pyplot as plt

In [128]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

'mps'

In [129]:
class_names = ["combat", "building", "fire", "rehab", "military"]

In [105]:
class MyNetwork(torch.nn.Module):
	def __init__(self, numChannels, classes):
		super(MyNetwork, self).__init__()
        
		# initialize first set of CONV => RELU => POOL layers
		self.conv1 = torch.nn.Conv2d(in_channels=numChannels, out_channels=20,
			kernel_size=(5, 5))
		self.relu1 = torch.nn.ReLU()
		self.maxpool1 = torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		# initialize second set of CONV => RELU => POOL layers
		self.conv2 = torch.nn.Conv2d(in_channels=20, out_channels=50,
			kernel_size=(5, 5))
		self.relu2 = torch.nn.ReLU()
		self.maxpool2 = torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
		# initialize first (and only) set of FC => RELU layers
		self.flatten = torch.nn.Flatten()
		self.fc1 = torch.nn.Linear(in_features=186050, out_features=500)
		self.relu3 = torch.nn.ReLU()
		# initialize our softmax classifier
		self.fc2 = torch.nn.Linear(in_features=500, out_features=classes)
		self.logSoftmax = torch.nn.LogSoftmax(dim=1)

	def forward(self, x):
		# pass the input through our first set of CONV => RELU =>
		# POOL layers
		x = self.conv1(x)
		x = self.relu1(x)
		x = self.maxpool1(x)
		# pass the output from the previous layer through the second
		# set of CONV => RELU => POOL layers
		x = self.conv2(x)
		x = self.relu2(x)
		x = self.maxpool2(x)
		# flatten the output from the previous layer and pass it
		# through our only set of FC => RELU layers
		x = self.flatten(x)
		x = self.fc1(x)
		x = self.relu3(x)
		# pass the output to our softmax classifier to get our output
		# predictions
		x = self.fc2(x)
		output = self.logSoftmax(x)
		# return the output predictions
		return output

In [106]:
# load the image data from "train" folder
train_data = datasets.ImageFolder(root="train", transform=torchvision.transforms.Compose([torchvision.transforms.Resize((256,256)), ToTensor()]))

In [107]:
test_data = datasets.ImageFolder(root="test", transform=torchvision.transforms.Compose([torchvision.transforms.Resize((256,256)), ToTensor()]))

In [108]:
BATCH_SIZE = 32

In [110]:
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
    batch_size=BATCH_SIZE, # how many samples per batch? 
    shuffle=True # shuffle data every epoch?
)

test_dataloader = DataLoader(test_data, # dataset to turn into iterable
    batch_size=BATCH_SIZE, # how many samples per batch? 
    shuffle=True # shuffle data every epoch?
)

In [111]:
# train_features_batch, train_labels_batch = next(iter(train_dataloader))
# train_features_batch.shape, train_labels_batch.shape

In [112]:
# torch.manual_seed(42)
# random_idx = torch.randint(0, len(train_features_batch), size=[1]).item()
# img, label = train_features_batch[random_idx], train_labels_batch[random_idx]

# plt.imshow(img.T)
# plt.title(class_names[label])
# plt.axis("off")
# print(f"Image size: {img.shape}")
# print(f"Label: {label}, label size: {label.shape}")
# plt.show()

In [131]:
torch.manual_seed(42)

model = MyNetwork(numChannels=3, classes=5).to(device)

loss_fn = nn.CrossEntropyLoss() # this is also called "criterion"/"cost function" in some places
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)

In [134]:
# next(model.parameters()).device

device(type='mps', index=0)

In [135]:
from helper_functions import accuracy_fn
from timeit import default_timer as timer 
from tqdm.auto import tqdm

In [136]:
def print_train_time(start: float, end: float, device: torch.device = None):
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

In [144]:
# Set the seed and start the timer
torch.manual_seed(42)
train_time_start_on_cpu = timer()

# Set the number of epochs (we'll keep this small for faster training times)
epochs = 3

# Create training and testing loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n-------")
    ### Training
    train_loss, train_acc = 0, 0
    # Add a loop to loop through training batches
    for batch, (X, y) in enumerate(train_dataloader):

        X, y = X.to(device), y.to(device)

        # model.train() 
        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulatively add up the loss per epoch 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        train_acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
        
        # Print out how many samples have been seen
        # if batch % 160 == 0:
        print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

    # Divide total train loss by length of train dataloader (average loss per batch per epoch)
    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)
    
    ### Testing
    # Setup variables for accumulatively adding up loss and accuracy 
    test_loss, test_acc = 0, 0 
    model.eval()
    with torch.inference_mode():
        for X, y in test_dataloader:

            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            test_pred = model(X)
           
            # 2. Calculate loss (accumatively)
            test_loss += loss_fn(test_pred, y) # accumulatively add up the loss per epoch

            # 3. Calculate accuracy (preds need to be same as y_true)
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
        
            # Calculations on test metrics need to happen inside torch.inference_mode()
            # Divide total test loss by length of test dataloader (per batch)
            test_loss /= len(test_dataloader)

            # Divide total accuracy by length of test dataloader (per batch)
            test_acc /= len(test_dataloader)

    ## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f}, Train acc: {train_acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%\n")

# Calculate training time      
train_time_end_on_cpu = timer()
total_train_time_model_0 = print_train_time(start=train_time_start_on_cpu, 
                                           end=train_time_end_on_cpu,
                                           device=str(next(model.parameters()).device))


  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
-------
Looked at 0/9112 samples
Looked at 32/9112 samples
Looked at 64/9112 samples
Looked at 96/9112 samples
Looked at 128/9112 samples
Looked at 160/9112 samples
Looked at 192/9112 samples
Looked at 224/9112 samples
Looked at 256/9112 samples
Looked at 288/9112 samples
Looked at 320/9112 samples
Looked at 352/9112 samples
Looked at 384/9112 samples
Looked at 416/9112 samples
Looked at 448/9112 samples
Looked at 480/9112 samples
Looked at 512/9112 samples
Looked at 544/9112 samples
Looked at 576/9112 samples
Looked at 608/9112 samples
Looked at 640/9112 samples
Looked at 672/9112 samples
Looked at 704/9112 samples
Looked at 736/9112 samples
Looked at 768/9112 samples
Looked at 800/9112 samples
Looked at 832/9112 samples
Looked at 864/9112 samples
Looked at 896/9112 samples
Looked at 928/9112 samples
Looked at 960/9112 samples
Looked at 992/9112 samples
Looked at 1024/9112 samples
Looked at 1056/9112 samples
Looked at 1088/9112 samples
Looked at 1120/9112 samples
Looked at 11