In [1]:
import torch
import torchvision as tv
import glob
import os
import sys
import numpy as np
import pandas as pd
import PIL
import time

import wandb
wandb.init(project="scifair")


Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable


W&B Run: https://app.wandb.ai/jayinnn/scifair/runs/8rbror2d

In [2]:
model = tv.models.inception_v3(pretrained=True)
model.fc = torch.nn.Linear(2048, 2) # change output layer
model.cuda()

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, t

In [3]:
class MountainDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir="./datasets/", transforms=None):
        super(MountainDataset, self).__init__()
        if not os.path.isdir(root_dir):
            raise FileNotFoundError("Dir {} Error".format(root_dir))
        
        self._data = glob.glob(os.path.join(root_dir, "*/*.png"))
        self._root = root_dir
        if transforms:
            self._transforms = transforms
        else:
            self._transforms = tv.transforms.ToTensor()
    
    def __getitem__(self, index):
        image = PIL.Image.open(self._data[index])
        data = self._transforms(image)
        label = int(self._data[index].replace(self._root, "").replace("/", "")[0])
        
        return (data, label)
    
    def __len__(self):
        return len(self._data)

In [4]:
def save_weight(model, file_name, directory = "checkpoints"):
    torch.save(model.state_dict(), "{}/{}".format(directory, file_name))
    torch.save(model.state_dict(), os.path.join(wandb.run.dir, 'model.pt'))
    print("Weight saved.")

In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')
model.to(device)


def run_one_epoch():    
    # training
    start = time.time()
    model.train()
    corrected = 0
    running_loss = 0
    for inputs, labels in data_loader_training:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs, aux_output = model(inputs)
        predicted = torch.max(outputs.data, 1)[1]
        for i in range(len(predicted)):
            if predicted[i] == labels[i]:
                corrected += 1
        
        # backward
        loss = criterion(outputs, labels)
        loss.backward()
        running_loss += loss.data.item()
        
        optimizer.step()
    
    running_loss = float(running_loss / len(dataset_training))
    training_acc = float(corrected / len(dataset_training))
    corrected = 0
    
    # testing
    model.eval()
    for inputs, labels in data_loader_testing:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        predicted = torch.max(outputs.data, 1)[1]
        for i in range(len(predicted)):
            if predicted[i] == labels[i]:
                corrected += 1
    end = time.time()
    
    testing_acc = float(corrected / len(dataset_testing))
    
    return (end-start, training_acc, testing_acc, running_loss)
    
def confusion_mat(model, data_loader):
    all_labels = []
    all_predicted = []
    model.eval()
    
    for inputs, labels in enumerate(data_loader):
        inputs = inputs.to(device)
        labels = inputs.to(device)
        
        all_label = np.concatenate((all_labels, labels.cpu()))
        
        outputs = model(inputs)
        predicted = torch.max(outputs.data, 1)[1]
        corrected += (predicted == labels).sum()
        
        all_predicted = np.concatenate((all_predicted, predicted.cpu()))
    
    confusion_matrix = pd.crosstab(pd.Series(all_labels, name="Actual"), pd.Series(all_predicted, name="Predicted"))


In [6]:
print(torch.__version__)

1.1.0


In [7]:
criterion = torch.nn.modules.CrossEntropyLoss()

In [8]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [9]:
root_dir = "./datasets/"


transform_training = tv.transforms.Compose([
    tv.transforms.RandomHorizontalFlip(p=0.5),
    tv.transforms.RandomVerticalFlip(p=0.5),
    tv.transforms.Resize((299, 299)),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize(mean = [0.5, 0.5, 0.5], std = [0.5, 0.5, 0.5])
])

transform_testing = tv.transforms.Compose([
    tv.transforms.Resize((299, 299)),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize(mean = [0.5, 0.5, 0.5], std = [0.5, 0.5, 0.5])
])

dataset_training = MountainDataset(root_dir, transform_training)
dataset_testing = MountainDataset(root_dir, transform_testing)

indices = torch.randperm(len(dataset_training)).tolist()
dataset_training = torch.utils.data.Subset(dataset_training, indices[:2000])
dataset_testing = torch.utils.data.Subset(dataset_testing, indices[2000:])

data_loader_training = torch.utils.data.DataLoader(dataset_training, batch_size = 10, shuffle=True, num_workers=3)
data_loader_testing = torch.utils.data.DataLoader(dataset_testing, batch_size = 10, shuffle=True, num_workers=3)


In [10]:
epoch_number = 100
best_acc = 0
best_loss = sys.float_info.max
wandb.watch(model)

for epoch in range(epoch_number):
    (runtime, training_acc, testing_acc, loss) = run_one_epoch()
    print("[Epoch #{}] runtime:{} sec., train_acc:{}, test_acc:{}, loss:{}".format(epoch, round(runtime, 2), training_acc, testing_acc, loss))
    wandb.log({"Run Time": round(runtime, 2), "Train Accuracy": training_acc, "Testing Accuracy": testing_acc, "Test Loss": loss})

    if training_acc > best_acc or (training_acc + 0.02 > best_acc and loss < best_loss):
        best_acc = training_acc
        best_loss = loss
        save_weight(model, "latest.pth")
    print()


print("Training complete. Best accuracy:{}, Best Loss:{}".format(best_acc, best_loss))

Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable


[Epoch #0] runtime:30.07 sec., train_acc:0.969, test_acc:1.0, loss:0.008515769877121784
Weight saved.

[Epoch #1] runtime:22.34 sec., train_acc:0.992, test_acc:0.9982174688057041, loss:0.00262675486918306
Weight saved.

[Epoch #2] runtime:22.51 sec., train_acc:0.9985, test_acc:1.0, loss:0.000990352214925224
Weight saved.

[Epoch #3] runtime:22.64 sec., train_acc:1.0, test_acc:1.0, loss:0.0005679102323410916
Weight saved.

[Epoch #4] runtime:22.86 sec., train_acc:0.9995, test_acc:1.0, loss:0.0003267372722730215
Weight saved.

[Epoch #5] runtime:26.49 sec., train_acc:0.9985, test_acc:1.0, loss:0.0004959893483537599

[Epoch #6] runtime:27.75 sec., train_acc:1.0, test_acc:1.0, loss:0.00023991233907509014
Weight saved.

[Epoch #7] runtime:24.56 sec., train_acc:1.0, test_acc:1.0, loss:0.000361804958067296

[Epoch #8] runtime:27.96 sec., train_acc:0.9995, test_acc:1.0, loss:0.00016267597523255973
Weight saved.

[Epoch #9] runtime:27.05 sec., train_acc:1.0, test_acc:1.0, loss:9.040464775171132

Weight saved.

[Epoch #88] runtime:23.13 sec., train_acc:1.0, test_acc:1.0, loss:6.3809991057937055e-06

[Epoch #89] runtime:28.26 sec., train_acc:1.0, test_acc:1.0, loss:9.289121747713125e-06

[Epoch #90] runtime:28.48 sec., train_acc:1.0, test_acc:1.0, loss:1.2046682565340916e-05

[Epoch #91] runtime:27.44 sec., train_acc:1.0, test_acc:1.0, loss:8.88127074480849e-06

[Epoch #92] runtime:28.06 sec., train_acc:1.0, test_acc:1.0, loss:7.974410014483623e-06

[Epoch #93] runtime:28.03 sec., train_acc:1.0, test_acc:1.0, loss:1.3324713819386602e-05

[Epoch #94] runtime:27.72 sec., train_acc:1.0, test_acc:1.0, loss:7.495176772351897e-06

[Epoch #95] runtime:27.97 sec., train_acc:1.0, test_acc:1.0, loss:9.958791821816249e-06

[Epoch #96] runtime:27.65 sec., train_acc:1.0, test_acc:1.0, loss:8.99856689279943e-06

[Epoch #97] runtime:27.74 sec., train_acc:1.0, test_acc:1.0, loss:7.273662102495138e-06

[Epoch #98] runtime:27.59 sec., train_acc:1.0, test_acc:1.0, loss:6.226480028658443e-06

[Epoc