In [11]:
import importlib
import json
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torchvision.models as models
from PIL import Image
from torchvision import transforms
from torch.nn import functional as F

sys.path.append("../src")
import dataloader
import mitdata
from dataloader import MITDataLoader
from mitdata import MITData

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
resnet50 = models.resnet50(pretrained=True)
resnet50.eval()
resnet50.to(device)
class_index = json.load(open('../data/imagenet_class_index.json', 'r'))

In [4]:
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(), normalize
])

In [37]:
train_loader = MITDataLoader(train=True)
test_loader = MITDataLoader(train=False)

'breaking'

In [7]:
def resnet_imagenet(data):
    imgs = data.load_images()
    img_tensor = []
    for img in imgs:
        im = Image.open(img)
        im = preprocess(im)
        img_tensor.append(im)
    tensor = torch.stack(img_tensor, dim=0).to(device)
    result = resnet50(tensor)
    result_ave = torch.mean(result, 0)
    _, label = torch.max(result_ave, 0)
    return label.item()

In [29]:
result = []
train_loader.reset()
for n, data in train_loader:
    #if n == 30:
    #   break
    print("computing for label index: {0:4d}, movie{1}".format(
        n, data.filename))
    label = resnet_imagenet(data)
    clazz = class_index[str(label)][1]
    result.append({
        "index": n,
        "train": "training",
        "path": os.path.join(data.dir_path, data.filename),
        "category": data.category,
        "imagenet_label": clazz,
        "imagenet_id": label
    })

computing for label index:    0, moviev018_0048.mp4
570
computing for label index:    1, moviev018_0099.mp4
796
computing for label index:    2, moviev018_0002.mp4
478
computing for label index:    3, moviev018_0004.mp4
810
computing for label index:    4, moviev018_0036.mp4
825
computing for label index:    5, moviev018_0068.mp4
438
computing for label index:    6, moviev018_0054.mp4
556
computing for label index:    7, moviev018_0090.mp4
471
computing for label index:    8, moviev186_0056.mp4
719
computing for label index:    9, moviev186_0083.mp4
702
computing for label index:   10, moviev186_0055.mp4
906
computing for label index:   11, moviev186_0041.mp4
906
computing for label index:   12, moviev186_0027.mp4
680
computing for label index:   13, moviev186_0019.mp4
697
computing for label index:   14, moviev186_0001.mp4
794
computing for label index:   15, moviev186_0075.mp4
629
computing for label index:   16, moviev225_0045.mp4
594
computing for label index:   17, moviev225_0043.

In [38]:
test_loader.reset()
for n, data in test_loader:
    #if n == 30:
    #   break
    print("computing for label index: {0:4d}, movie{1}".format(
        n, data.filename))
    label = resnet_imagenet(data)
    clazz = class_index[str(label)][1]
    result.append({
        "index": n,
        "train": "test",
        "path": os.path.join(data.dir_path, data.filename),
        "category": data.category,
        "imagenet_label": clazz,
        "imagenet_id": label
    })

computing for label index:    0, moviev277_0047.mp4
570
computing for label index:    1, moviev252_0075.mp4
463
computing for label index:    2, moviev335_0098.mp4
929
computing for label index:    3, moviev256_0076.mp4
543
computing for label index:    4, moviev125_0086.mp4
457
computing for label index:    5, moviev245_0075.mp4
851
computing for label index:    6, moviev139_0091.mp4
745
computing for label index:    7, moviev192_0049.mp4
920
computing for label index:    8, moviev009_0050.mp4
465
computing for label index:    9, moviev224_0001.mp4
105
computing for label index:   10, moviev289_0029.mp4
854
computing for label index:   11, moviev207_0026.mp4
943
computing for label index:   12, moviev047_0082.mp4
851
computing for label index:   13, moviev114_0092.mp4
939
computing for label index:   14, moviev151_0098.mp4
433
computing for label index:   15, moviev133_0018.mp4
461
computing for label index:   16, moviev013_0079.mp4
834
computing for label index:   17, moviev049_0006.

In [41]:
df = pd.DataFrame(result)
df = df.set_index("index")
df.to_csv("../result/imagenet_training.csv", index_label="index")

In [48]:
# th architecture to use
arch = 'resnet50'

# load the pre-trained weights
model_file = '%s_places365.pth.tar' % arch
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
    os.system('wget ' + weight_url)

model = models.__dict__[arch](num_classes=365)
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {
    str.replace(k, 'module.', ''): v
    for k, v in checkpoint['state_dict'].items()
}
model.load_state_dict(state_dict)
model.to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [44]:
# load the image transformer
centre_crop = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [45]:
# load the class label
file_name = 'categories_places365.txt'
if not os.access(file_name, os.W_OK):
    synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt'
    os.system('wget ' + synset_url)
classes = list()
with open(file_name) as class_file:
    for line in class_file:
        classes.append(line.strip().split(' ')[0][3:])
classes = tuple(classes)

In [46]:
def resnet_places365(data):
    imgs = data.load_images()
    img_tensor = []
    for img in imgs:
        im = Image.open(img)
        im = centre_crop(im)
        img_tensor.append(im)
    tensor = torch.stack(img_tensor, dim=0).to(device)
    result = model(tensor)
    result_ave = torch.mean(result, 0)
    _, label = torch.max(result_ave, 0)
    return label.item()

In [66]:
result_places = []
train_loader.reset()
for n, data in train_loader:
    print("computing for label index: {0:4d}, movie{1}".format(
        n, data.filename))
    print(label)
    label = resnet_places365(data)
    clazz = classes[label]
    result_places.append({
        "index": n,
        "train": "training",
        "path": os.path.join(data.dir_path, data.filename),
        "category": data.category,
        "imagenet_label": clazz,
        "imagenet_id": label
    })

computing for label index:    0, moviev018_0048.mp4
135
computing for label index:    1, moviev018_0099.mp4
122
computing for label index:    2, moviev018_0002.mp4
307
computing for label index:    3, moviev018_0004.mp4
98
computing for label index:    4, moviev018_0036.mp4
341
computing for label index:    5, moviev018_0068.mp4
13
computing for label index:    6, moviev018_0054.mp4
191
computing for label index:    7, moviev018_0090.mp4
187
computing for label index:    8, moviev186_0056.mp4
186
computing for label index:    9, moviev186_0083.mp4
238
computing for label index:   10, moviev186_0055.mp4
122
computing for label index:   11, moviev186_0041.mp4
179
computing for label index:   12, moviev186_0027.mp4
248
computing for label index:   13, moviev186_0019.mp4
179
computing for label index:   14, moviev186_0001.mp4
303
computing for label index:   15, moviev186_0075.mp4
179
computing for label index:   16, moviev225_0045.mp4
50
computing for label index:   17, moviev225_0043.mp4

In [68]:
test_loader.reset()
for n, data in test_loader:
    print("computing for label index: {0:4d}, movie{1}".format(
        n, data.filename))
    print(label)
    label = resnet_places365(data)
    clazz = classes[label]
    result_places.append({
        "index": n,
        "train": "test",
        "path": os.path.join(data.dir_path, data.filename),
        "category": data.category,
        "imagenet_label": clazz,
        "imagenet_id": label
    })

computing for label index:    0, moviev277_0047.mp4
356
computing for label index:    1, moviev252_0075.mp4
240
computing for label index:    2, moviev335_0098.mp4
179
computing for label index:    3, moviev256_0076.mp4
248
computing for label index:    4, moviev125_0086.mp4
7
computing for label index:    5, moviev245_0075.mp4
19
computing for label index:    6, moviev139_0091.mp4
177
computing for label index:    7, moviev192_0049.mp4
259
computing for label index:    8, moviev009_0050.mp4
186
computing for label index:    9, moviev224_0001.mp4
346
computing for label index:   10, moviev289_0029.mp4
227
computing for label index:   11, moviev207_0026.mp4
137
computing for label index:   12, moviev047_0082.mp4
124
computing for label index:   13, moviev114_0092.mp4
137
computing for label index:   14, moviev151_0098.mp4
277
computing for label index:   15, moviev133_0018.mp4
223
computing for label index:   16, moviev013_0079.mp4
303
computing for label index:   17, moviev049_0006.mp4

In [71]:
df = pd.DataFrame(result_places)
df = df.set_index("index")
df.to_csv("../result/places365_training.csv", index_label="index")

In [70]:
len(result_places)

1250

In [73]:
df["imagenet_id"].unique().shape

(246,)