In [1]:
import os
import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import urllib.request
import shutil
import json
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
class image_data_set(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
  
    def __len__(self):
        return len(self.data)
  
    def __getitem__(self, index):
        return {'data': self.data[index], 'label': self.labels[index]}

In [3]:
def download_data(folder_name, idaho_folder_path, json_file_name):
    blob_name = "https://lilablobssc.blob.core.windows.net/idaho-camera-traps/"
    
    if not os.path.isdir(idaho_folder_path + "/" + json_file_name):
        json_file_zip_name = json_file_name + ".zip"
        json_zip_to_download = blob_name + json_file_zip_name
        download_json_zip_command = "azcopy cp '%s' '%s'" % (json_zip_to_download, idaho_folder_path)
        os.system(download_json_zip_command)
        shutil.unpack_archive(json_file_zip_name)
        os.remove(idaho_folder_path + "/" + json_file_zip_name)
    else:
        print("Required json zip already downloaded")
    
    if not os.path.isdir(idaho_folder_path + "/" + folder_name):
        folder_to_download = blob_name + "public/" + folder_name
        download_folder_command = "azcopy cp '%s' '%s' --recursive" % (folder_to_download, idaho_folder_path)
        os.system(download_folder_command)
    else:
        print("Required folder already downloaded")

In [4]:
def get_image_tensor(file_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    image = Image.open(file_path)
    return transform(image)
    
def get_data_sets(folder_name, idaho_folder_path, json_file_name, categories_to_label_dict): 
    json_file = open(idaho_folder_path + "/" + json_file_name)
    coco_key = json.load(json_file)
    images = coco_key["images"]

    data, labels = [], []
    for index, image in enumerate(images):
        file_name = image["file_name"]
        file_path = idaho_folder_path + "/" + file_name
        if file_name.startswith(folder_name) and os.path.isfile(file_path):
            category_id = coco_key["annotations"][index]["category_id"]
            label = categories_to_label_dict[category_id]
            image_tensor = get_image_tensor(file_path)
            data.append(image_tensor)
            labels.append(label)
            print("Preparing image number: " + str(index))
    
    training_data, testing_data, training_labels, testing_labels = train_test_split(data, labels)
    
    print("\nNumber of training photos: " + str(len(training_data)))
    print("Number of testing photos: " + str(len(testing_data)))
    
    training_data_set = image_data_set(training_data, training_labels)
    testing_data_set = image_data_set(testing_data, testing_labels)
    
    json_file.close()
    shutil.rmtree(idaho_folder_path + "/" + folder_name)
    os.remove(idaho_folder_path + "/" + json_file_name)
    
    return training_data_set, testing_data_set

def get_loaders(training_data_set, testing_data_set, batch_size):
    training_loader = torch.utils.data.DataLoader(dataset = training_data_set,
                                                  batch_size = batch_size,
                                                  shuffle = True)

    testing_loader = torch.utils.data.DataLoader(dataset = testing_data_set,
                                                 batch_size = batch_size,
                                                 shuffle = True)
    
    return training_loader, testing_loader

In [5]:
def print_image(image_tensor, prediction):
    if(prediction == 1):
        prediction_string = "Wildlife Present"
    else:
        prediction_string = "No Wildlife Present"

    #Alternative normalized RGB visualization: plt.imshow(image_tensor.cpu().permute(1, 2, 0).numpy())
    plt.imshow(image_tensor[0].cpu(), cmap="gray")
    plt.title("Incorrectly Predicted " + prediction_string) 
    plt.show()

def print_testing_analysis(all_labels, all_predictions, title):
    subplot = plt.subplot()

    cf_matrix = confusion_matrix(all_labels, all_predictions, labels=[1, 0])
    sns.heatmap(cf_matrix, annot=True, fmt='g', cmap='Blues')

    subplot.set_xlabel('Predictions')
    subplot.set_ylabel('Labels')
    subplot.set_title(title + ' Testing Confusion Matrix')
    subplot.xaxis.set_ticklabels(['Wildlife Present', 'No Wildlife Present'])
    subplot.yaxis.set_ticklabels(['Wildlife Present', 'No Wildlife Present'])
    plt.show()

    accuracy = accuracy_score(all_labels, all_predictions)
    print(title + " Accuracy: " + str(accuracy))

    precision, recall, f_score, support = precision_recall_fscore_support(all_labels, all_predictions, average='binary')
    print(title + " Precision: " + str(precision))
    print(title + " Recall: " + str(recall))
    print(title + " F-Score: " + str(f_score))

def train(model, training_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    num_correct = 0
    for i, data in enumerate(training_loader):
        print("batch number: " + str(i))
        data, labels = data['data'].to(device), data['label'].to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, labels)
        running_loss += loss.item()
        _, predictions = torch.max(output.data, 1)
        num_correct += (predictions == labels).sum().item()
        loss.backward()
        optimizer.step()
    
    loss = running_loss/len(training_loader.dataset)
    accuracy = num_correct/len(training_loader.dataset)
    return loss, accuracy

def test(model, testing_loader, criterion, print_incorrect_images):
    model.eval()
    running_loss = 0.0
    num_correct = 0
    all_labels, all_predictions = [], []

    for i, data in enumerate(testing_loader):
        data, labels = data['data'].to(device), data['label'].to(device)
        output = model(data)
        loss = criterion(output, labels)
        running_loss += loss.item()
        _, predictions = torch.max(output.data, 1)
        for index, prediction in enumerate(predictions):
            if(prediction == labels[index]):
                num_correct += 1
            elif(print_incorrect_images):
                print_image(data[index], prediction)

        all_labels.extend(labels.cpu())
        all_predictions.extend(predictions.cpu())
    
    loss = running_loss/len(testing_loader.dataset)
    accuracy = num_correct/len(testing_loader.dataset)
    return loss, accuracy, all_labels, all_predictions

In [6]:
def train_and_test(model, training_loader, testing_loader, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    
    for epoch in range(10):
        print("epoch: " + str(epoch))
        
        training_loss, training_accuracy = train(model, training_loader, criterion, optimizer)
        print("training loss: " + str(training_loss) + " and training accuracy: " + str(training_accuracy))
        
        testing_loss, testing_accuracy, _, _ = test(model, testing_loader, criterion, False)
        print("testing loss: " + str(testing_loss) + " and testing accuracy: " + str(testing_accuracy))

    testing_loss, testing_accuracy, labels, predictions = test(model, testing_loader, criterion, True)
    print_testing_analysis(labels, predictions, "Overall")

In [7]:
def train_and_test_ResNet50(training_loader, testing_loader, device, num_classes):
    print("\nTraining and Testing ResNet50")
    resnet50 = models.resnet50(weights = models.ResNet50_Weights.DEFAULT)
    resnet50.fc.out_features = num_classes
    train_and_test(resnet50, training_loader, testing_loader, device)

def train_and_test_ResNet152(training_loader, testing_loader, device, num_classes):
    print("\nTraining and Testing ResNet152")
    resnet152 = models.resnet152(weights = models.ResNet152_Weights.DEFAULT)
    resnet152.fc.out_features = num_classes
    train_and_test(resnet152, training_loader, testing_loader, device)

def train_and_test_ViT_L_16(training_loader, testing_loader, device, num_classes):
    print("\nTraining and Testing Vision Transformer Large 16")
    vit_l_16 = models.vit_l_16(weights = models.ViT_L_16_Weights.DEFAULT)
    vit_l_16.heads.out_features = num_classes
    train_and_test(vit_l_16, training_loader, testing_loader, device)

# Orchestration

In [8]:
num_classes = 2
batch_size = 10
json_file_name = "idaho-camera-traps.json"
folder_name = "loc_0000"
idaho_folder_path = "/Users/ChaseIson 1/Documents/Research/Code/oregon_wildlife_identification/model_benchmarking/Idaho"

# Mapping canines, big cats, bears, and ungulates to wildlife present and all other categories to no wildlife present
# This is mostly arbitrary and could be reworked, we just need to draw the line somewhere
categories_to_label_dict = {
    0:0, 1:0, 2:0, 3:1, 4:0, 5:1, 6:1, 7:0, 8:0, 9:1, 
    10:1, 11:0, 12:1, 13:1, 14:0, 15:0, 16:1, 17:0, 18:1, 19:0,
    20:1, 21:0, 22:1, 23:0, 24:1, 25:0, 26:0, 27:0, 28:0, 29:0,
    30:0, 31:0, 32:0, 33:0, 34:0, 35:0, 36:0, 37:0, 38:1, 39:1,
    40:1, 41:0, 42:0, 43:0, 44:0, 45:1, 46:0, 47:0, 48:1, 49:0,
    50:0, 51:0, 52:0, 53:0, 54:0, 55:0, 56:0, 57:0, 58:0, 59:0,
    60:0, 61:0,
}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [9]:
download_data(folder_name, idaho_folder_path, json_file_name)

INFO: Scanning...
INFO: azcopy: A newer version 10.18.0 is available to download

INFO: Any empty folders will not be processed, because source and/or destination doesn't have full folder support

Job c7b53777-c2e8-dc48-60ef-de458ce8cfa5 has started
Log file is located at: /Users/ChaseIson 1/.azcopy/c7b53777-c2e8-dc48-60ef-de458ce8cfa5.log

100.0 %, 1 Done, 0 Failed, 0 Pending, 0 Skipped, 1 Total,                                 


Job c7b53777-c2e8-dc48-60ef-de458ce8cfa5 summary
Elapsed Time (Minutes): 0.0667
Number of File Transfers: 1
Number of Folder Property Transfers: 0
Total Number of Transfers: 1
Number of Transfers Completed: 1
Number of Transfers Failed: 0
Number of Transfers Skipped: 0
TotalBytesTransferred: 26765533
Final Job Status: Completed

INFO: Scanning...
INFO: azcopy: A newer version 10.18.0 is available to download

INFO: Any empty folders will not be processed, because source and/or destination doesn't have full folder support

Job 1297790c-2fd7-994d-4d23-cfadb314

In [10]:
training_data_set, testing_data_set = get_data_sets(folder_name, idaho_folder_path, json_file_name, categories_to_label_dict)
training_loader, testing_loader = get_loaders(training_data_set, testing_data_set, batch_size)

Preparing image number: 0
Preparing image number: 1
Preparing image number: 2
Preparing image number: 3
Preparing image number: 4
Preparing image number: 5
Preparing image number: 6
Preparing image number: 7
Preparing image number: 8
Preparing image number: 9
Preparing image number: 10
Preparing image number: 11
Preparing image number: 12
Preparing image number: 13
Preparing image number: 14
Preparing image number: 15
Preparing image number: 16
Preparing image number: 17
Preparing image number: 18
Preparing image number: 19
Preparing image number: 20
Preparing image number: 21
Preparing image number: 22
Preparing image number: 23
Preparing image number: 24
Preparing image number: 25
Preparing image number: 26
Preparing image number: 27
Preparing image number: 28
Preparing image number: 29
Preparing image number: 30
Preparing image number: 31
Preparing image number: 32
Preparing image number: 33
Preparing image number: 34
Preparing image number: 35
Preparing image number: 36
Preparing i

Preparing image number: 301
Preparing image number: 302
Preparing image number: 303
Preparing image number: 304
Preparing image number: 305
Preparing image number: 306
Preparing image number: 307
Preparing image number: 308
Preparing image number: 309
Preparing image number: 310
Preparing image number: 311
Preparing image number: 312
Preparing image number: 313
Preparing image number: 314
Preparing image number: 315
Preparing image number: 316
Preparing image number: 317
Preparing image number: 318
Preparing image number: 319
Preparing image number: 320
Preparing image number: 321
Preparing image number: 322
Preparing image number: 323
Preparing image number: 324
Preparing image number: 325
Preparing image number: 326
Preparing image number: 327
Preparing image number: 328
Preparing image number: 329
Preparing image number: 330
Preparing image number: 331
Preparing image number: 332
Preparing image number: 333
Preparing image number: 334
Preparing image number: 335
Preparing image numb

Preparing image number: 596
Preparing image number: 597
Preparing image number: 598
Preparing image number: 599
Preparing image number: 600
Preparing image number: 601
Preparing image number: 602
Preparing image number: 603
Preparing image number: 604
Preparing image number: 605
Preparing image number: 606
Preparing image number: 607
Preparing image number: 608
Preparing image number: 609
Preparing image number: 610
Preparing image number: 611
Preparing image number: 612
Preparing image number: 613
Preparing image number: 614
Preparing image number: 615
Preparing image number: 616
Preparing image number: 617
Preparing image number: 618
Preparing image number: 619
Preparing image number: 620
Preparing image number: 621
Preparing image number: 622
Preparing image number: 623
Preparing image number: 624
Preparing image number: 625
Preparing image number: 626
Preparing image number: 627
Preparing image number: 628
Preparing image number: 629
Preparing image number: 630
Preparing image numb

Preparing image number: 891
Preparing image number: 892
Preparing image number: 893
Preparing image number: 894
Preparing image number: 895
Preparing image number: 896
Preparing image number: 897
Preparing image number: 898
Preparing image number: 899
Preparing image number: 900
Preparing image number: 901
Preparing image number: 902
Preparing image number: 903
Preparing image number: 904
Preparing image number: 905
Preparing image number: 906
Preparing image number: 907
Preparing image number: 908
Preparing image number: 909
Preparing image number: 910
Preparing image number: 911
Preparing image number: 912
Preparing image number: 913
Preparing image number: 914
Preparing image number: 915
Preparing image number: 916
Preparing image number: 917
Preparing image number: 918
Preparing image number: 919
Preparing image number: 920
Preparing image number: 921
Preparing image number: 922
Preparing image number: 923
Preparing image number: 924
Preparing image number: 925
Preparing image numb

Preparing image number: 1179
Preparing image number: 1180
Preparing image number: 1181
Preparing image number: 1182
Preparing image number: 1183
Preparing image number: 1184
Preparing image number: 1185
Preparing image number: 1186
Preparing image number: 1187
Preparing image number: 1188
Preparing image number: 1189
Preparing image number: 1190
Preparing image number: 1191
Preparing image number: 1192
Preparing image number: 1193
Preparing image number: 1194
Preparing image number: 1195
Preparing image number: 1196
Preparing image number: 1197
Preparing image number: 1198
Preparing image number: 1199
Preparing image number: 1200
Preparing image number: 1201
Preparing image number: 1202
Preparing image number: 1203
Preparing image number: 1204
Preparing image number: 1205
Preparing image number: 1206
Preparing image number: 1207
Preparing image number: 1208
Preparing image number: 1209
Preparing image number: 1210
Preparing image number: 1211
Preparing image number: 1212
Preparing imag

Preparing image number: 1464
Preparing image number: 1465
Preparing image number: 1466
Preparing image number: 1467
Preparing image number: 1468
Preparing image number: 1469
Preparing image number: 1470
Preparing image number: 1471
Preparing image number: 1472
Preparing image number: 1473
Preparing image number: 1474
Preparing image number: 1475
Preparing image number: 1476
Preparing image number: 1477
Preparing image number: 1478
Preparing image number: 1479
Preparing image number: 1480
Preparing image number: 1481
Preparing image number: 1482
Preparing image number: 1483
Preparing image number: 1484
Preparing image number: 1485
Preparing image number: 1486
Preparing image number: 1487
Preparing image number: 1488
Preparing image number: 1489
Preparing image number: 1490
Preparing image number: 1491
Preparing image number: 1492
Preparing image number: 1493
Preparing image number: 1494
Preparing image number: 1495
Preparing image number: 1496
Preparing image number: 1497
Preparing imag

Preparing image number: 1750
Preparing image number: 1751
Preparing image number: 1752
Preparing image number: 1753
Preparing image number: 1754
Preparing image number: 1755
Preparing image number: 1756
Preparing image number: 1757
Preparing image number: 1758
Preparing image number: 1759
Preparing image number: 1760
Preparing image number: 1761
Preparing image number: 1762
Preparing image number: 1763
Preparing image number: 1764
Preparing image number: 1765
Preparing image number: 1766
Preparing image number: 1767
Preparing image number: 1768
Preparing image number: 1769
Preparing image number: 1770
Preparing image number: 1771
Preparing image number: 1772
Preparing image number: 1773
Preparing image number: 1774
Preparing image number: 1775
Preparing image number: 1776
Preparing image number: 1777
Preparing image number: 1778
Preparing image number: 1779
Preparing image number: 1780
Preparing image number: 1781
Preparing image number: 1782
Preparing image number: 1783
Preparing imag

In [11]:
train_and_test_ResNet50(training_loader, testing_loader, device, num_classes)


Training and Testing ResNet50
epoch: 0
batch number: 0
batch number: 1
batch number: 2
batch number: 3
batch number: 4
batch number: 5
batch number: 6
batch number: 7
batch number: 8
batch number: 9
batch number: 10
batch number: 11
batch number: 12
batch number: 13
batch number: 14
batch number: 15
batch number: 16
batch number: 17
batch number: 18
batch number: 19
batch number: 20
batch number: 21
batch number: 22
batch number: 23
batch number: 24
batch number: 25
batch number: 26
batch number: 27
batch number: 28
batch number: 29
batch number: 30
batch number: 31
batch number: 32
batch number: 33
batch number: 34
batch number: 35
batch number: 36
batch number: 37
batch number: 38
batch number: 39
batch number: 40
batch number: 41
batch number: 42
batch number: 43
batch number: 44
batch number: 45
batch number: 46
batch number: 47
batch number: 48
batch number: 49
batch number: 50
batch number: 51
batch number: 52
batch number: 53
batch number: 54
batch number: 55
batch number: 56
b

KeyboardInterrupt: 

In [None]:
train_and_test_ResNet152(training_loader, testing_loader, device, num_classes)

In [None]:
train_and_test_ViT_L_16(training_loader, testing_loader, device, num_classes)