In [13]:
import os
from json import load
import numpy as np
import cupy as cp
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

import chainer
import chainer.links as L
import chainer.functions as F
from chainer import cuda
from chainer import optimizers
from chainer import iterators
from chainer import training
from chainer.training import extensions
from chainer import datasets
from chainer.datasets import TransformDataset

from functools import partial
from chainercv import transforms

In [14]:
from chainer.datasets import LabeledImageDataset
train = LabeledImageDataset('/home/ubuntu/intern-winter-2019/dataset/simple_crop/train_data/train_label.csv',
                            '/home/ubuntu/intern-winter-2019/dataset/simple_crop/train_data/images/',dtype=np.float32)
test = LabeledImageDataset('/home/ubuntu/intern-winter-2019/dataset/simple_crop/test_data/test_label.csv',
                            '/home/ubuntu/intern-winter-2019/dataset/simple_crop/test_data/images/',dtype=np.float32)

In [15]:
from chainer.datasets import TransformDataset

def transform_train(data, train='True'):
    img, label = data
    img = img.copy()

    # Color augmentation
    if train:
        img = transforms.pca_lighting(img, 76.5)

    # Random flip & crop
    if train:
        img = transforms.random_flip(img, x_random=True)
        img = transforms.random_expand(img, max_ratio=1.5)
        img = transforms.random_crop(img, (200, 200))
    img=np.array(img, dtype=np.float32)
    img=L.model.vision.vgg.prepare(img)

    return img, label

def transform_test(data):
    img, label = data
    img=np.array(img, dtype=np.float32)
    img=L.model.vision.vgg.prepare(img)
    return img, label

train_dataset = TransformDataset(train, partial(transform_train, train=True))
test_dataset = TransformDataset(test, transform_test)

In [16]:
len(train_dataset)

1967

In [17]:
class Model(chainer.Chain):
    def __init__(self, dim=2):
        super(Model, self).__init__()
        with self.init_scope():
            self.vgg=L.VGG16Layers()
            self.fc1=L.Linear(None, 500)
            self.fc2=L.Linear(None, 2)
            
            
    def __call__(self, x, train=False):
        with chainer.using_config('train', train):
            h=self.vgg(x, layers=['fc7'])['fc7']
            h=self.fc1(h)
            h=F.relu(h)
            h=F.normalize(h)*50
            y=self.fc(h)
        return y
    
class SoftMaxEntoropyLoss(chainer.Chain):
    def __init__(self, model):
        super(SoftMaxEntoropyLoss, self).__init__()
        with self.init_scope():
            self.model = model

    def __call__(self, x, t):
        y = self.model(x)
        loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)
        summary = F.classification_summary(y, t, beta = 1.0)
        precision = summary[0]
        recall = summary[1]
        f_value = summary[2]
        precision_dict=dict(('precision_%d' % i, val) for i, val in enumerate(summary[0]))
        recall_dict=dict(('recall_%d' % i, val) for i, val in enumerate(summary[1]))
        f_value_dict=dict(('f_value_%d' % i, val) for i, val in enumerate(summary[2]))
        main_dict={'loss':loss, 'accuracy':accuracy}
        main_dict.update(precision_dict)
        main_dict.update(recall_dict)
        main_dict.update(f_value_dict)
        chainer.report(main_dict, self)
        return loss

In [18]:
model=Model()
loss=SoftMaxEntoropyLoss(model)

In [19]:
# VGG16の初期化 modelの作成
vgg = L.VGG16Layers()

In [20]:
n_epoch = 10
batchsize = 128
out_dir = './result/'
report_interval = (10, 'iteration')

train_iter = iterators.MultithreadIterator\
(train_dataset, batchsize, repeat=True, shuffle=True)
test_iter = iterators.MultithreadIterator\
(test_dataset, batchsize, repeat=False, shuffle=False)

In [21]:
#fine tuningなのでMomentumSDG
optimizer = chainer.optimizers.MomentumSGD(lr=1e-6)
optimizer.setup(loss)

<chainer.optimizers.momentum_sgd.MomentumSGD at 0x7f08d115e9e8>

In [22]:
#device=0でGPU, device=-1でCPU
updater = training.StandardUpdater(train_iter, optimizer, device=0)

In [23]:
trainer = training.Trainer(updater, (n_epoch, 'epoch'), out=out_dir)

trainer.extend(extensions.LogReport(trigger=report_interval))
trainer.extend(extensions.PrintReport(['epoch', 
                                       'iteration', 
                                       'main/loss',
                                       'main/accuracy', 
                                       'main/precision_0', 
                                       'main/precision_1',
                                       'main/recall_0',
                                       'main/recall_1']),
               trigger=report_interval)
trainer.extend(extensions.PlotReport(y_keys='main/loss', trigger=report_interval))
trainer.extend(extensions.PlotReport(y_keys='main/accuracy', trigger=report_interval))

In [24]:
trainer.run()

Exception in main training loop: [Errno 2] No such file or directory: '/home/ubuntu/intern-winter-2019/dataset/simple_crop/train_data/images/Abyssinian_175.jpg'
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/chainer_p36/lib/python3.6/site-packages/chainer/training/trainer.py", line 315, in run
    update()
  File "/home/ubuntu/anaconda3/envs/chainer_p36/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 165, in update
    self.update_core()
  File "/home/ubuntu/anaconda3/envs/chainer_p36/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 170, in update_core
    batch = iterator.next()
  File "/home/ubuntu/anaconda3/envs/chainer_p36/lib/python3.6/site-packages/chainer/iterators/multithread_iterator.py", line 112, in __next__
    batch = self._get()
  File "/home/ubuntu/anaconda3/envs/chainer_p36/lib/python3.6/site-packages/chainer/iterators/multithread_iterator.py", line 184, in _get
    batch = [data fo

FileNotFoundError: [Errno 2] No such file or directory: '/home/ubuntu/intern-winter-2019/dataset/simple_crop/train_data/images/Abyssinian_175.jpg'

In [None]:
# save
chainer.serializers.save_npz('./intern2019_finetune2.model', model)

In [None]:
X = []
T = []
for x, t in test_dataset:
    #if t != 0 and np.random.random() > 0.01:
    #    continue
    X.append(x)
    T.append(t)

X = cp.array(X,dtype=np.float32).reshape(-1, 3, 224, 224)
T = np.array(T,dtype=np.int32)

In [None]:
Y=[]
for i in range(len(X)):
    Y.append([model(X[i].reshape(1,3,224,224)).data[0][0],model(X[i].reshape(1,3,224,224)).data[0][1]])
Y=np.array(Y)

In [None]:
plt.figure(figsize=(15, 10))
plt.scatter(Y[:,0], Y[:,1],c=T)
plt.colorbar()

In [None]:
from sklearn import cluster

In [None]:
clf=cluster.KMeans(n_clusters=2)
pred=clf.fit_predict(Y)

In [None]:
plt.figure(figsize=(15, 10))
plt.scatter(Y[:, 0], Y[:,1], c=pred, cmap='tab10')
plt.colorbar()

In [None]:
Y_label=[]
for _ in Y:
    flag=True
    if _[0]>_[1]:
        flag=False
    Y_label.append(flag)

Y_label=np.array(Y_label)

In [None]:
T_label=[T==1][0]

In [None]:
cnt=0
for i in range(len(T)):
    if Y_label[i]==T_label[i]:
        cnt+=1
(cnt-475)/len(T==1)

In [None]:
# chance rate(全部 positive と判定した場合の precision)
len(T[T!=1]) / len(T)

In [None]:
plt.figure(figsize=(15, 10))
plt.scatter(Y[:, 0], Y[:,1], c=T, cmap='tab10')
plt.scatter(float(Y[~Y_label][:,0].sum()/len(Y[~Y_label][:,0])),float(Y[~Y_label][:,1].sum()/len(Y[~Y_label][:,1])),c='r')
