In [1]:
# FOR RESNEXT ------------------------------------------------
# https://pytorch.org/hub/pytorch_vision_resnext/

# download this and put it in the folder
# https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json



# FOR MASK RCNN / COCO -----------------------------------------
# run this - needed for the Coco Dataset 
# pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
# repo for it: https://github.com/philferriere/cocoapi

# http://cocodataset.org/#download
# from above, download 2017 Train/Val annotations
# 2017 Val images
# put it in a folder named 'data' or edit dataset cell (last one w/code) 

import torch
from torchvision import models, transforms
import torchvision

from pycocotools.coco import COCO

import pandas as pd

# Gets the GPU if there is one
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


# ResNext

In [2]:
network = models.resnext50_32x4d(pretrained=True).eval() # set it to eval right away

imagenet_data = torchvision.datasets.ImageNet("data", download=True)
data_loader = torch.utils.data.DataLoader(imagenet_data, batch_size=10, shuffle=True)

In [3]:
# Download an example image from the pytorch website - from their example
import urllib
from PIL import Image

url, filename = ("https://github.com/pytorch/hub/raw/master/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

image = Image.open(filename)

In [4]:
# for images to be used for testing
trans = transforms.Compose(([
    #transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224), # I think this is th emin size
    transforms.ToTensor(), # divides by 255
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]))

In [5]:
input_img = trans(image) # pushes image through trans
print(input_img.shape)
input_batch = input_img.unsqueeze(0) # adds a fourth dimension (batch)
print(input_batch.shape)

torch.Size([3, 224, 224])
torch.Size([1, 3, 224, 224])


In [6]:
with torch.no_grad():
    output = network(input_batch) # put image batch through network
    
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
# (from their website, interesting)
preds = torch.nn.functional.softmax(output[0], dim=0)
preds.argmax(dim=0) # whatever this class is

tensor(258)

In [7]:
labels = pd.read_json("imagenet_class_index.json").T.drop([0], axis=1).squeeze()
labels # Series

0                  tench
1               goldfish
2      great_white_shark
3            tiger_shark
4             hammerhead
             ...        
995            earthstar
996     hen-of-the-woods
997               bolete
998                  ear
999        toilet_tissue
Name: 1, Length: 1000, dtype: object

# Mask R-CNN

In [8]:
# https://pytorch.org/docs/stable/torchvision/models.html#object-detection-instance-segmentation-and-person-keypoint-detection
network = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

In [9]:
network.eval()

MaskRCNN(
  (transform): GeneralizedRCNNTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d()
          )
    

In [10]:
coco_val = torchvision.datasets.CocoDetection("data/val2017", annFile="data/annotations/instances_val2017.json",
                                             transform=transforms.ToTensor)

loading annotations into memory...
Done (t=1.56s)
creating index...
index created!


In [11]:
# https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
# haven't really looked at that much yet. might help a lot