In [1]:
import os
import torch
import math
import numpy as np
import torchvision
import torch.nn as nn
from PIL import Image
import torch.utils.model_zoo as model_zoo
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.autograd import Variable
from tqdm import tqdm
import torch.backends.cudnn as cudnn
from torchvision import datasets, models, transforms

cuda = torch.cuda.is_available()

In [10]:


__all__ = ['ResNet', 'resnet']


model_urls = {
    'resnet18': 'http://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'http://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'http://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'http://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'http://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

def cfg(depth):
    depth_lst = [18, 34, 50, 101, 152]
    assert (depth in depth_lst), "Error : ResNet depth should be either 18, 34, 50, 101, 152"
    cf_dict = {
        '18' : (BasicBlock, [2,2, 2,2]),
        '34' : (BasicBlock, [3,4, 6,3]),
        '50' : (Bottleneck, [3,4, 6,3]),
        '101': (Bottleneck, [3,4,23,3]),
        '152': (Bottleneck, [3,8,36,3]),
    }

    return cf_dict[str(depth)]


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

def resnet(pretrained=False, depth=18, **kwargs):
    """Constructs ResNet models for various depths
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        depth (int) : Integer input of either 18, 34, 50, 101, 152
    """
    block, num_blocks = cfg(depth)
    model = ResNet(block, num_blocks, **kwargs)
    if (pretrained):
        print("| Downloading ImageNet fine-tuned ResNet-%d..." %depth)
        model.load_state_dict(model_zoo.load_url(model_urls['resnet%d' %depth]))
    return model

In [11]:
# model = resnet(True, 50)
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(57344, 8)

# if cuda:
#     model = model.cuda()

| Downloading ImageNet fine-tuned ResNet-50...


In [6]:
len(list(model.children()))

10

In [7]:
transform = transforms.Compose(
    [
     transforms.Resize((127,223)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
     
     ])

In [8]:
# test_data_dir = "F:/dataset/test"
# train_data_dir = "F:/dataset/train"

In [7]:
# test_data = torchvision.datasets.ImageFolder(root=test_data_dir,transform=transform)
# testloader = torch.utils.data.DataLoader(test_data,batch_size = 1, shuffle=True)

# train_data = torchvision.datasets.ImageFolder(root=train_data_dir,transform=transform)
# trainloader = torch.utils.data.DataLoader(train_data)

In [22]:
pre_formed = "WebService/spectrograms"

In [23]:
dummy_data = torchvision.datasets.ImageFolder(root=pre_formed,transform=transform)
dummyloader = torch.utils.data.DataLoader(dummy_data)

In [30]:
# classes = [d 
#            for d in os.listdir(train_data_dir) if os.path.isdir(os.path.join(train_data_dir, d))]

In [28]:
classes = ['dance',
 'electronic',
 'heavy_metal',
 'hip-hop',
 'jazz',
 'rock',
 'romantic',
 'sufi']

In [29]:
classes

['dance',
 'electronic',
 'heavy_metal',
 'hip-hop',
 'jazz',
 'rock',
 'romantic',
 'sufi']

In [None]:
train_data

In [None]:
dummy_data

In [None]:
losss = nn.CrossEntropyLoss()
criterion = nn.CrossEntropyLoss()
optimizer= optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [34]:
check = torch.load("Gtorch")

In [36]:
model.load

AttributeError: 'ResNet' object has no attribute 'load'

<bound method Module.load_state_dict of ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, k

In [33]:
model.load_state_dict(check)

AttributeError: 'ResNet' object has no attribute 'copy'

In [None]:
for epoch in tqdm(range(3)):
    curr_loss = 0.0
    
    for i, data in enumerate(trainloader, 0):
        # takes labels from first sub directory of main folder
        # inputs are each images in these sub directory
        inputs, labels  = data
        # gete data in cuda format!!
        inputs, labels  = Variable(inputs.cuda()), Variable(labels.cuda())
        
#         print(inputs.shape)
        # must make all gradients 0
        optimizer.zero_grad()
#         input.s

        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        
        curr_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, curr_loss / 2000))
            curr_loss = 0.
            
            

            
print('Finished Training')

In [None]:
torch.save(model, "GNtorch")

In [None]:
# for epoch in tqdm(range(2)):
#     curr_loss = 0.0
    
#     for i, data in enumerate(trainloader, 0):
#         # takes labels from first sub directory of main folder
#         # inputs are each images in these sub directory
#         inputs, labels  = data
#         # gete data in cuda format!!
#         inputs, labels  = Variable(inputs.cuda()), Variable(labels.cuda())
        
# #         print(inputs.shape)
#         # must make all gradients 0
#         optimizer.zero_grad()
# #         input.s

#         outputs = model(inputs)
#         _, preds = torch.max(outputs.data, 1)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
        
        
#         curr_loss += loss.item()
#         if i % 2000 == 1999:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, curr_loss / 2000))
#             curr_loss = 0.
            
            

            
# print('Finished Training')

In [None]:
torch.save(model, "GNtorch")

In [None]:
# for epoch in tqdm(range(2)):
#     curr_loss = 0.0
    
#     for i, data in enumerate(trainloader, 0):
#         # takes labels from first sub directory of main folder
#         # inputs are each images in these sub directory
#         inputs, labels  = data
#         # gete data in cuda format!!
#         inputs, labels  = Variable(inputs.cuda()), Variable(labels.cuda())
        
# #         print(inputs.shape)
#         # must make all gradients 0
#         optimizer.zero_grad()
# #         input.s

#         outputs = model(inputs)
#         _, preds = torch.max(outputs.data, 1)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
        
        
#         curr_loss += loss.item()
#         if i % 2000 == 1999:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, curr_loss / 2000))
#             curr_loss = 0.
            
            

            
# print('Finished Training')

In [46]:
model = torch.load("Gtorch")

In [47]:
model.load_state_dict

<bound method Module.load_state_dict of ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, k

In [43]:
 checkpoint = torch.load("Gtorch")

In [49]:
model.state_dict = checkpoint.state_dict

In [50]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        model = resnet(True, 50)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(57344, 8) 
        
        checkpoint = torch.load("Gtorch")
#         model.load_state_dict(checkpoint)
        model.state_dict = checkpoint.state_dict
        
        num_final_in = model.fc.in_features
        model.fc = nn.Linear(num_final_in, 8)
        self.fmodel = nn.Sequential(*list(model.modules()))
        self.sig = nn.Sigmoid()
        
    def forward(self, x):
        print(x.shape)
        x = self.fmodel(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.sig(x)
        return x

model = MyModel()

| Downloading ImageNet fine-tuned ResNet-50...


In [13]:
model

MyModel(
  (fmodel): Sequential(
    (0): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace)
        

In [51]:
model

MyModel(
  (fmodel): Sequential(
    (0): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace)
        

In [52]:
model = model.cuda()

In [53]:
from collections import Counter
correct = 0
total = 0
all_pred = []
genre_output = []
genre_label = []
with torch.no_grad():
    for data in dummyloader:
        images, labels = data 
        
        if cuda:
            images, labels  = images.cuda(), labels.cuda()
#         print(images.shape)
        outputs = model(images)
        genre_output.append(outputs.cpu().data)
#         print(outputs.shape)
#         print(torch.softmax(outputs))
        
    genre_output = torch.cat(genre_output)
    _, max_indices = genre_output.max(dim=1)
    max_index = Counter(max_indices).most_common(1)[0] 
    print(max_index )
    
        
#         _, predicted = torch.max(outputs.data, 1)
#         print(predicted)
#         predicted_genre = classes[np.array(predicted)[0]]
#         all_pred.append(predicted_genre)
#     final_pred =Counter(all_pred)
#     print(final_pred)
# print('Accuracy of the network on the  test images: %d %%' % (
#     100 * correct / total))

torch.Size([1, 3, 127, 223])


RuntimeError: Given input size: (2048x4x7). Calculated output size: (2048x0x1). Output size is too small at c:\programdata\miniconda3\conda-bld\pytorch_1524549877902\work\aten\src\thcunn\generic/SpatialAveragePooling.cu:63

In [None]:
[ 0,  0,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  4,  7,  7,
         7,  0,  7,  0,  0,  7,  0,  0,  0,  0,  0,  0,  1,  7,
         0,  0].count(0)