In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torchvision import datasets, models, transforms

In [3]:
import os

In [5]:
torch.cuda.is_available()

True

In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

image_datasets = {x: datasets.ImageFolder(x,
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=50,
                                             shuffle=True, num_workers=6)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

use_gpu = torch.cuda.is_available()

In [5]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [6]:
df_train = pd.read_csv('labels.csv')
df_test = pd.read_csv('sample_submission.csv')

In [5]:
df_train.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [6]:
typelist = df_test.columns
typelist = typelist[1:]

mapping = {typelist[i]:i for i in range(120)}

df_train['breed'] = df_train['breed'].map(mapping)

y_train = df_train['breed'].as_matrix()

In [7]:
name_test = df_train['id'].iloc[0]

In [8]:
df_test.columns

Index(['id', 'affenpinscher', 'afghan_hound', 'african_hunting_dog',
       'airedale', 'american_staffordshire_terrier', 'appenzeller',
       'australian_terrier', 'basenji', 'basset',
       ...
       'toy_poodle', 'toy_terrier', 'vizsla', 'walker_hound', 'weimaraner',
       'welsh_springer_spaniel', 'west_highland_white_terrier', 'whippet',
       'wire-haired_fox_terrier', 'yorkshire_terrier'],
      dtype='object', length=121)

In [9]:
name_test

'000bec180eb18c7604dcecc8fe0dba07'

# Skip to Load File

In [39]:
M = Image.open('train/{}.jpg'.format(name_test))

In [40]:
M_mirror = np.flip(M,1)

In [41]:
!PIL.__version__

/bin/sh: 1: PIL.__version__: not found


In [42]:
M.size

(500, 375)

In [43]:
imsize = 224

In [11]:
imsize = 224
X_train = np.zeros((N,imsize,imsize,3))

# Load File

In [11]:
X_data = np.load('X_train.npy')

In [22]:
#normalize
X_data = X_data/256
#y_train = np.concatenate([y_train,y_train],axis=0)
X_train, X_val, y_train, y_val = train_test_split(X_data, y_train, test_size=0.20, random_state=42)
del X_data

In [37]:
#del X_train, X_val, y_train, y_val

In [27]:
#Combine Data
train_data = [X_train, y_train]
val_data = [X_val, y_val]

In [9]:
# model_conv = models.resnet50(pretrained=True)
# for param in model_conv.parameters():
#     param.requires_grad = False
# num_ftrs = model_conv.fc.in_features
# model_conv.fc = nn.Sequential(
#     nn.Linear(num_ftrs, 1000),
#     nn.ReLU(),
#     nn.Dropout(p=0.5),
#     nn.Linear(1000, 120) 
# )
model_conv = models.resnet152()
for param in model_conv.parameters():
    param.requires_grad = False
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1000),
    nn.ReLU(),
    nn.Linear(1000, 120) 
) 

model_conv.load_state_dict(torch.load('model/test1'))

model_conv = model_conv.cuda()
criterion = nn.CrossEntropyLoss()
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

In [10]:
import time
model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=1)

Epoch 0/0
----------
train Loss: 0.0278 Acc: 0.7246
val Loss: 0.0165 Acc: 0.8450

Training complete in 5m 9s
Best val Acc: 0.845000


In [141]:
torch.save(model_conv.state_dict(), 'model/test2')

In [19]:
# data_transforms = {
#     'train': transforms.Compose([
#         transforms.RandomSizedCrop(224),
#         transforms.RandomHorizontalFlip(),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ]),
#     'val': transforms.Compose([
#         transforms.Scale(256),
#         transforms.CenterCrop(224),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
#     ]),
# }
# image_datasets = {x: datasets.ImageFolder(x,
#                                           data_transforms[x])
#                   for x in ['train', 'val']}
# dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=100,
#                                              shuffle=True, num_workers=6)
#               for x in ['train', 'val']}
# dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# class_names = image_datasets['train'].classes
test_transform = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


In [48]:
st = time.time()
file_list = os.listdir('test')
example = file_list[0]
m = nn.Softmax()
image = Image.open('test/'+example)
image = test_transform(image).view(1,3,224,224)
image = Variable(image)
image = image.cuda()
# image
outputs = model_conv(image)
# outputs = m(outputs).data.cpu().numpy()
# df_out.iloc[1:2,1:121] = outputs
# tse = time.time() - st
# tse

RuntimeError: argument 2 (training) must be bool, not str

In [197]:
model_conv.cuda()

ResNet (
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (relu): ReLU (inplace)
  (maxpool): MaxPool2d (size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
  (layer1): Sequential (
    (0): Bottleneck (
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)
      (downsample): Sequential (
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): Bott

In [203]:
_, pred = torch.max(outputs.data,1)
pred


 49
[torch.cuda.LongTensor of size 1 (GPU 0)]

In [None]:
_, preds = torch.max(outputs.data, 1)
preds

In [17]:
def load_image(place):
    image = Image.open(place)
    image = test_transform(image)
#     image = Variable(image)
    return image

In [63]:
def load_image_batch(model,place, file_list, batch_size=100):
    batch_count = 0
    image_counts = len(file_list)
    loops = np.int(np.ceil(image_counts/batch_size))
    inputs = Variable(torch.zeros(batch_size,3,224,224))
    model.train(False)
    m = nn.Softmax()
    for i in range(loops):
        st = batch_size*i
        ed = st + batch_size
        for index, file_name in enumerate(file_list[st:ed]):
            inputs[index] = load_image(os.path.join(place, file_name))
        inputs = inputs.cuda()
        outputs = model(inputs)
        if ed > image_counts:
            ed = image_counts
        df_out.iloc[st:ed,1:121] = m(outputs).data.cpu().numpy()
        

In [64]:
file_list = os.listdir('test')
file_list.sort()

In [65]:
load_image_batch(model_conv,'test',file_list)

ValueError: could not broadcast input array from shape (100,120) into shape (57,120)

In [133]:
m = nn.Softmax()
for index, name in enumerate(file_list):
    place = 'test/'+name
    image = load_image(place)
    outputs = model_conv(image)
    df_out.iloc[index:index+1,1:121] = m(outputs).data.cpu().numpy()
    count += 1

In [124]:
for x in class_names:
    df_test[x] = 0

In [55]:
df_out = df_test.copy()

['000621fb3cbb32d8935728e48679680e.jpg',
 '00102ee9d8eb90812350685311fe5890.jpg',
 '0012a730dfa437f5f3613fb75efcd4ce.jpg',
 '001510bc8570bbeee98c8d80c8a95ec1.jpg',
 '001a5f3114548acdefa3d4da05474c2e.jpg',
 '00225dcd3e4d2410dd53239f95c0352f.jpg',
 '002c2a3117c2193b4d26400ce431eebd.jpg',
 '002c58d413a521ae8d1a5daeb35fc803.jpg',
 '002f80396f1e3db687c5932d7978b196.jpg',
 '0036c6bcec6031be9e62a257b1c3c442.jpg',
 '0041940322116ae58c38130f5a6f71f9.jpg',
 '0042d6bf3e5f3700865886db32689436.jpg',
 '004476c96f575879af4af471af65cae8.jpg',
 '00485d47de966a9437ad3b33ac193b6f.jpg',
 '00496f65de6cc319145ce97bd6e90360.jpg',
 '004bf14426d1a830d459a9e0c0721309.jpg',
 '004c3721eb88358f462cdcec6b2380b7.jpg',
 '00559f56aab7e0a7749220f6aed65162.jpg',
 '005b281f1a4d6f29d527c9585e9bd33c.jpg',
 '005b6c6c76fefd6b458ef6fb6e54da6e.jpg',
 '006870b49353779b25eeb91fed43c31a.jpg',
 '0068f3a21b159ece126a28580cdad7a0.jpg',
 '0069b1cc4546fc98f84f981bf9a0696a.jpg',
 '0077bc3c63486ff09d3774d956af8f76.jpg',
 '00780e5d2bf4f7

In [36]:
for index, row in df_test.iterrows():
    name = row['id']
    chosen_class = out[name+'.jpg']
    df_out.iloc[index][chosen_class] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [44]:
file_list

['000621fb3cbb32d8935728e48679680e.jpg',
 '00102ee9d8eb90812350685311fe5890.jpg',
 '0012a730dfa437f5f3613fb75efcd4ce.jpg',
 '001510bc8570bbeee98c8d80c8a95ec1.jpg',
 '001a5f3114548acdefa3d4da05474c2e.jpg',
 '00225dcd3e4d2410dd53239f95c0352f.jpg',
 '002c2a3117c2193b4d26400ce431eebd.jpg',
 '002c58d413a521ae8d1a5daeb35fc803.jpg',
 '002f80396f1e3db687c5932d7978b196.jpg',
 '0036c6bcec6031be9e62a257b1c3c442.jpg',
 '0041940322116ae58c38130f5a6f71f9.jpg',
 '0042d6bf3e5f3700865886db32689436.jpg',
 '004476c96f575879af4af471af65cae8.jpg',
 '00485d47de966a9437ad3b33ac193b6f.jpg',
 '00496f65de6cc319145ce97bd6e90360.jpg',
 '004bf14426d1a830d459a9e0c0721309.jpg',
 '004c3721eb88358f462cdcec6b2380b7.jpg',
 '00559f56aab7e0a7749220f6aed65162.jpg',
 '005b281f1a4d6f29d527c9585e9bd33c.jpg',
 '005b6c6c76fefd6b458ef6fb6e54da6e.jpg',
 '006870b49353779b25eeb91fed43c31a.jpg',
 '0068f3a21b159ece126a28580cdad7a0.jpg',
 '0069b1cc4546fc98f84f981bf9a0696a.jpg',
 '0077bc3c63486ff09d3774d956af8f76.jpg',
 '00780e5d2bf4f7

In [68]:
for index, row in df_test.iterrows():
    name = row['id']
    chosen_class = out[name+'.jpg']
    df_out.set_value(index, chosen_class, 1)

In [122]:
df_out.iloc[2:4,2:4] = 5

In [68]:
def load_image(place):
    image = Image.open(place)
    image = test_transform(image).view(1,3,224,224)
    image = Variable(image)
    return image

In [74]:
model_conv.train(False)
for i in range(10000,10357):
    inputs = load_image(os.path.join('test',file_list[i]))
    inputs = inputs.cuda()
    outputs = model_conv(inputs)
    df_out.iloc[i:i+1,1:121] = m(outputs).data.cpu().numpy()

In [75]:
df_out

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.000412113,0.000325203,7.83019e-06,1.44187e-06,6.72905e-06,3.07938e-05,6.41483e-05,1.2849e-05,4.15843e-05,...,0.00022263,7.87111e-05,4.68433e-06,2.79176e-05,3.82708e-06,0.000359385,5.05694e-05,4.59121e-06,1.73485e-05,0.000573537
1,00102ee9d8eb90812350685311fe5890,6.648e-05,4.41944e-05,4.56934e-05,4.68478e-06,2.12042e-05,0.000204795,5.87201e-05,2.81313e-05,3.12394e-06,...,0.000288497,6.31082e-05,2.44688e-06,4.93184e-06,1.01473e-05,2.78094e-05,0.00234421,8.51379e-06,4.00264e-06,4.45752e-05
2,0012a730dfa437f5f3613fb75efcd4ce,6.75317e-05,0.0468661,0.00066804,0.000188932,0.000365711,0.000294171,3.87663e-05,3.64403e-05,0.0041606,...,0.000160579,5.17317e-05,0.000139504,0.00145361,0.00213458,0.028577,0.000245883,0.00563717,0.000909941,2.98392e-05
3,001510bc8570bbeee98c8d80c8a95ec1,0.00811981,0.00160797,0.00389652,0.000287018,0.0367351,0.0163212,0.000333977,0.000617165,0.00134678,...,0.00101727,0.00170711,0.000170364,0.000496616,0.000833314,8.97397e-05,0.000257868,0.0105883,0.000109499,0.000304195
4,001a5f3114548acdefa3d4da05474c2e,0.0265716,0.00344845,2.39726e-05,5.25203e-05,2.36173e-05,1.00301e-05,0.00109599,1.00492e-05,2.83951e-05,...,0.00248881,3.30456e-05,8.3313e-06,1.09312e-05,1.07719e-05,3.21764e-05,0.00153821,1.48202e-05,0.000752224,0.00466033
5,00225dcd3e4d2410dd53239f95c0352f,0.00323672,0.0169006,0.00235996,0.0468842,0.00032974,0.000203301,0.000975596,7.35083e-05,0.000370136,...,0.00664814,0.000186188,0.000148549,0.000419417,0.000449283,0.000252631,0.000200434,0.000831194,0.0265102,0.000503353
6,002c2a3117c2193b4d26400ce431eebd,0.00116885,0.000424934,0.00023366,0.00022898,5.37972e-05,5.64445e-05,0.817974,0.000808987,2.17723e-05,...,0.000152145,0.000707631,3.31116e-05,1.15153e-05,1.3587e-05,9.41757e-05,0.00230156,4.22128e-05,0.000228635,0.0380181
7,002c58d413a521ae8d1a5daeb35fc803,0.000612381,0.000291766,0.000439428,5.85706e-05,7.75547e-05,0.000492569,0.000614867,0.000144452,2.07521e-05,...,0.00128322,0.000274336,2.0832e-05,3.76929e-05,7.00342e-05,0.000169824,0.00922756,8.68754e-05,0.000129092,0.000393808
8,002f80396f1e3db687c5932d7978b196,6.80764e-05,0.00065326,0.000234155,0.002435,2.95855e-05,8.19261e-06,0.000267163,5.54494e-06,7.30107e-06,...,5.96046e-06,6.62364e-06,1.83431e-05,1.72018e-05,4.5758e-05,1.56643e-05,0.000181361,0.000845729,0.00422231,1.19674e-05
9,0036c6bcec6031be9e62a257b1c3c442,0.000335494,0.000971824,0.000108775,0.000468418,0.000213483,2.82987e-05,0.000241618,2.47664e-06,0.000804391,...,0.000943987,7.72381e-06,0.000664377,5.0678e-05,0.000760008,0.00284111,2.4969e-05,1.64496e-05,6.36624e-05,0.00021627


In [73]:
df_out.iloc[10000]

id                                f785b2499ca08a8ec3f20972c358cfd7
affenpinscher                                           4.1274e-05
afghan_hound                                           1.75809e-05
african_hunting_dog                                      0.0011192
airedale                                               1.95235e-05
american_staffordshire_terrier                         0.000184334
appenzeller                                            0.000614734
australian_terrier                                     0.000807914
basenji                                                 0.00564422
basset                                                 0.000141584
beagle                                                   0.0039017
bedlington_terrier                                     1.68431e-05
bernese_mountain_dog                                   0.000169538
black-and-tan_coonhound                                1.02494e-05
blenheim_spaniel                                        0.0012

In [69]:
df_out.sum()

id                                000621fb3cbb32d8935728e48679680e00102ee9d8eb90...
affenpinscher                                                                    74
afghan_hound                                                                    144
african_hunting_dog                                                              84
airedale                                                                        140
american_staffordshire_terrier                                                   53
appenzeller                                                                       2
australian_terrier                                                              139
basenji                                                                         123
basset                                                                           83
beagle                                                                          201
bedlington_terrier                                                          

In [137]:
df_out.sum()

id                                000621fb3cbb32d8935728e48679680e00102ee9d8eb90...
affenpinscher                                                               73.6057
afghan_hound                                                                134.227
african_hunting_dog                                                         77.3733
airedale                                                                    118.365
american_staffordshire_terrier                                              68.1432
appenzeller                                                                 64.1888
australian_terrier                                                          115.613
basenji                                                                     123.842
basset                                                                      84.4502
beagle                                                                      105.391
bedlington_terrier                                                          

In [76]:
df_out.to_csv('submit.csv')

In [77]:
file_list

['000621fb3cbb32d8935728e48679680e.jpg',
 '00102ee9d8eb90812350685311fe5890.jpg',
 '0012a730dfa437f5f3613fb75efcd4ce.jpg',
 '001510bc8570bbeee98c8d80c8a95ec1.jpg',
 '001a5f3114548acdefa3d4da05474c2e.jpg',
 '00225dcd3e4d2410dd53239f95c0352f.jpg',
 '002c2a3117c2193b4d26400ce431eebd.jpg',
 '002c58d413a521ae8d1a5daeb35fc803.jpg',
 '002f80396f1e3db687c5932d7978b196.jpg',
 '0036c6bcec6031be9e62a257b1c3c442.jpg',
 '0041940322116ae58c38130f5a6f71f9.jpg',
 '0042d6bf3e5f3700865886db32689436.jpg',
 '004476c96f575879af4af471af65cae8.jpg',
 '00485d47de966a9437ad3b33ac193b6f.jpg',
 '00496f65de6cc319145ce97bd6e90360.jpg',
 '004bf14426d1a830d459a9e0c0721309.jpg',
 '004c3721eb88358f462cdcec6b2380b7.jpg',
 '00559f56aab7e0a7749220f6aed65162.jpg',
 '005b281f1a4d6f29d527c9585e9bd33c.jpg',
 '005b6c6c76fefd6b458ef6fb6e54da6e.jpg',
 '006870b49353779b25eeb91fed43c31a.jpg',
 '0068f3a21b159ece126a28580cdad7a0.jpg',
 '0069b1cc4546fc98f84f981bf9a0696a.jpg',
 '0077bc3c63486ff09d3774d956af8f76.jpg',
 '00780e5d2bf4f7