# AIM

* ニューラルネットワークでアイリスデータ分類 with PyTorch

In [7]:
import datetime
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [8]:
from pkg_resources import get_distribution
import platform
print("python", platform.python_version())
print("")
libs = ["numpy", "pandas", "scikit-learn", "torch", "torchvision"]
for lib in libs:
    version = get_distribution(lib).version
    print(lib, version)

python 3.5.2

numpy 1.13.1
pandas 0.20.3
scikit-learn 0.18.2
torch 0.2.0.post1
torchvision 0.1.9


In [9]:
# モデルクラス定義

class NN(torch.nn.Module):
    def __init__(self, in_size, hidden_size, out_size):
        # クラスの初期化
        # :param in_size: 入力層のサイズ
        # :param hidden_size: 隠れ層のサイズ
        # :param out_size: 出力層のサイズ
        super(NN, self).__init__()
        self.xh = torch.nn.Linear(in_size, hidden_size)
        self.hh = torch.nn.Linear(hidden_size, hidden_size)
        self.hy = torch.nn.Linear(hidden_size, out_size)
    
    def __call__(self, x):
        # 順伝播を計算する関数
        # :param x: 入力値
        h = F.relu(self.xh(x))
        h = F.relu(self.hh(h))
        y = F.log_softmax(self.hy(h))
        return y

In [60]:
# 学習

EPOCH_NUM = 100
HIDDEN_SIZE = 20
BATCH_SIZE = 20

 # データ
N = 100
in_size = 4
out_size = 3
iris = load_iris()
data = pd.DataFrame(data= np.c_[iris["data"], iris["target"]], columns= iris["feature_names"] + ["target"])
data = np.array(data.values)
perm = np.random.permutation(len(data))
data = data[perm]
train, test = np.split(data, [N])
train_x, train_y, test_x, test_y = [], [], [], []
for t in train:
    train_x.append(t[0:4])
    train_y.append(t[4])
for t in test:
    test_x.append(t[0:4])
    test_y.append(t[4])
train_x = np.array(train_x, dtype="float32")
train_y = np.array(train_y, dtype="int32")
test_x = np.array(test_x, dtype="float32")
test_y = np.array(test_y, dtype="int32")
train_x = torch.from_numpy(train_x)
train_y = torch.from_numpy(train_y)
test_x = torch.from_numpy(test_x)
test_y = torch.from_numpy(test_y)

# DataLoader化
train = torch.utils.data.TensorDataset(train_x, train_y)
train_loader = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
 
# モデルの定義
model = NN(in_size=in_size, hidden_size=HIDDEN_SIZE, out_size=out_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
 
# 学習開始
print("Train")
st = datetime.datetime.now()
for epoch in range(EPOCH_NUM):
    # ミニバッチ学習
    total_loss = 0
    for i, data in enumerate(train_loader):
        x, y = data
        x, y = Variable(x), Variable(y)
        optimizer.zero_grad()
        y_ = model(x)
        loss = criterion(y_, y)
        total_loss += loss.data[0]
        loss.backward()
        optimizer.step()
    if (epoch+1) % 10 == 0:
        # accuracy
        x, y = Variable(train_x), Variable(train_y)
        _, y_ = torch.max(model(x).data, 1)
        accuracy = sum(y.data.numpy() == y_.numpy()) / N
        # test accuracy
        x, y = Variable(test_x), Variable(test_y)
        _, y_ = torch.max(model(x).data, 1)
        test_accuracy = sum(y.data.numpy() == y_.numpy()) / len(y.data.numpy())
        ed = datetime.datetime.now()
        print("epoch:\t{}\ttotal loss:\t{}\taccuracy:\t{}\tvaridation accuracy\t{}\ttime:\t{}".format(epoch+1, total_loss, accuracy, test_accuracy, ed-st))
        st = datetime.datetime.now()

Train
epoch:	10	total loss:	4.7144821882247925	accuracy:	0.69	varidation accuracy	0.62	time:	0:00:00.070227
epoch:	20	total loss:	3.4004125595092773	accuracy:	0.82	varidation accuracy	0.88	time:	0:00:00.055588
epoch:	30	total loss:	2.4579378366470337	accuracy:	0.95	varidation accuracy	0.98	time:	0:00:00.057831
epoch:	40	total loss:	1.9144966900348663	accuracy:	0.96	varidation accuracy	0.98	time:	0:00:00.060857
epoch:	50	total loss:	1.5091854631900787	accuracy:	0.97	varidation accuracy	0.96	time:	0:00:00.053361
epoch:	60	total loss:	1.1891621351242065	accuracy:	0.98	varidation accuracy	0.94	time:	0:00:00.053629
epoch:	70	total loss:	0.9551442116498947	accuracy:	0.97	varidation accuracy	0.98	time:	0:00:00.055144
epoch:	80	total loss:	0.792793445289135	accuracy:	0.98	varidation accuracy	0.98	time:	0:00:00.056565
epoch:	90	total loss:	0.6855632364749908	accuracy:	0.98	varidation accuracy	0.98	time:	0:00:00.057540
epoch:	100	total loss:	0.5890189185738564	accuracy:	0.98	varidation accuracy	

In [61]:
# 予測

print("Predict")
print("x\ty\tpredict")
idx = np.random.choice(len(iris.data)-N, 10)
for i in idx:
    x, y = test_x[i], test_y[i]
    y_ = model(x = Variable(x.view(1,len(x)))).data
    _, y_ = torch.max(y_, 1)
    print(x.numpy(), "\t", y, "\t", y_[0])

Predict
x	y	predict
[ 7.5999999  3.         6.5999999  2.0999999] 	 2 	 2
[ 6.69999981  3.0999999   4.4000001   1.39999998] 	 1 	 1
[ 5.5         2.4000001   3.79999995  1.10000002] 	 1 	 1
[ 4.80000019  3.4000001   1.89999998  0.2       ] 	 0 	 0
[ 4.80000019  3.4000001   1.89999998  0.2       ] 	 0 	 0
[ 7.69999981  2.79999995  6.69999981  2.        ] 	 2 	 2
[ 4.80000019  3.0999999   1.60000002  0.2       ] 	 0 	 0
[ 7.69999981  2.5999999   6.9000001   2.29999995] 	 2 	 2
[ 6.          3.4000001   4.5         1.60000002] 	 1 	 1
[ 5.          3.29999995  1.39999998  0.2       ] 	 0 	 0
