In [1]:
import json
import os

In [2]:
print("metadata loading")
with open("../../../CameraTraps/snapshotserengeti-unzipped/SnapshotSerengeti_S1-6_v2.1_categories_only.json") as f:
    metadata = json.load(f)

print("bounding boxes loading")
with open("../../../CameraTraps/snapshotserengeti-unzipped/SnapshotSerengetiBboxes_20190903.json") as f:
    bboxes = json.load(f)

metadata loading
bounding boxes loading


In [21]:
def get_index(data, image_id):
    for i, image in enumerate(data["annotations"]):
        if image["image_id"] == image_id:
            return i
    return -1


In [22]:
index = get_index(bboxes, metadata["annotations"][0]["image_id"])
if index != -1:
    print(metadata["annotations"][0])
else:
    print("Image not found")

Image not found


In [2]:
import torch
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device.type

'cuda'

In [3]:
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)
print(train_data)
print(test_data)
print(train_data.data.size())
print(train_data.targets.size())


Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()
torch.Size([60000, 28, 28])
torch.Size([60000])


In [4]:
from torch.utils.data import DataLoader
loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=4),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=4),
}

In [5]:
import torch.nn as nn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # add two fully connected layers with ReLU activation
        self.fc1 = nn.Sequential(
            nn.Linear(32 * 7 * 7, 512),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU()
        )
        # output layer, output 10 classes
        self.out = nn.Linear(512, 10)    
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  
        x = self.fc1(x)
        x = self.fc2(x)
        output = self.out(x)
        return output, x    # return x for visualization
cnn = CNN().to(device)
loss_func = nn.CrossEntropyLoss().to(device)   
from torch import optim
optimizer = optim.Adam(cnn.parameters(), lr = 0.01)

In [6]:
from torch.autograd import Variable
num_epochs = 10

def train(num_epochs, cnn, loaders):
    
    cnn.train()
        
    # Train the model
    total_step = len(loaders['train'])
        
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images).to(device)   # batch x
            b_y = Variable(labels).to(device)   # batch y
            output = cnn(b_x)[0]               
            loss = loss_func(output, b_y)
            
            # clear gradients for this training step   
            optimizer.zero_grad()           
            
            # backpropagation, compute gradients 
            loss.backward()                # apply gradients             
            optimizer.step()                
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
train(num_epochs, cnn, loaders)

Epoch [1/10], Step [100/600], Loss: 2.3045
Epoch [1/10], Step [200/600], Loss: 2.2967
Epoch [1/10], Step [300/600], Loss: 2.3032
Epoch [1/10], Step [400/600], Loss: 2.3040
Epoch [1/10], Step [500/600], Loss: 2.2976
Epoch [1/10], Step [600/600], Loss: 2.2905
Epoch [2/10], Step [100/600], Loss: 2.3052
Epoch [2/10], Step [200/600], Loss: 2.3032
Epoch [2/10], Step [300/600], Loss: 2.3012
Epoch [2/10], Step [400/600], Loss: 2.3012
Epoch [2/10], Step [500/600], Loss: 2.3008
Epoch [2/10], Step [600/600], Loss: 2.3022
Epoch [3/10], Step [100/600], Loss: 2.3036
Epoch [3/10], Step [200/600], Loss: 2.2933
Epoch [3/10], Step [300/600], Loss: 2.3030
Epoch [3/10], Step [400/600], Loss: 2.2949
Epoch [3/10], Step [500/600], Loss: 2.2993
Epoch [3/10], Step [600/600], Loss: 2.3086
Epoch [4/10], Step [100/600], Loss: 2.3070
Epoch [4/10], Step [200/600], Loss: 2.3061
Epoch [4/10], Step [300/600], Loss: 2.3068
Epoch [4/10], Step [400/600], Loss: 2.3025
Epoch [4/10], Step [500/600], Loss: 2.2886
Epoch [4/10

In [12]:
def test():
    # Test the model
    cnn.eval()    
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            test_output, last_layer = cnn(images.to(device))
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels.to(device)).sum().item() / float(labels.size(0))
        print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
test()

Test Accuracy of the model on the 10000 test images: 0.09


In [19]:
sample = next(iter(loaders['test']))
imgs, lbls = sample
actual_number = lbls[:10].numpy()
test_output, last_layer = cnn(imgs[:10])
pred_y = torch.max(test_output.to(device), 1)[1].data.numpy().squeeze()
print(f'Prediction number:\t {pred_y}')
print(f'Actual number:\t\t {actual_number}')

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor