In [1]:
# ライブラリ読み込み
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

import pandas as pd

In [2]:
# データセットの読み込み
wine = load_wine()
wine

 'data': array([[  1.42300000e+01,   1.71000000e+00,   2.43000000e+00, ...,
           1.04000000e+00,   3.92000000e+00,   1.06500000e+03],
        [  1.32000000e+01,   1.78000000e+00,   2.14000000e+00, ...,
           1.05000000e+00,   3.40000000e+00,   1.05000000e+03],
        [  1.31600000e+01,   2.36000000e+00,   2.67000000e+00, ...,
           1.03000000e+00,   3.17000000e+00,   1.18500000e+03],
        ..., 
        [  1.32700000e+01,   4.28000000e+00,   2.26000000e+00, ...,
           5.90000000e-01,   1.56000000e+00,   8.35000000e+02],
        [  1.31700000e+01,   2.59000000e+00,   2.37000000e+00, ...,
           6.00000000e-01,   1.62000000e+00,   8.40000000e+02],
        [  1.41300000e+01,   4.10000000e+00,   2.74000000e+00, ...,
           6.10000000e-01,   1.60000000e+00,   5.60000000e+02]]),
 'feature_names': ['alcohol',
  'malic_acid',
  'ash',
  'alcalinity_of_ash',
  'magnesium',
  'total_phenols',
  'flavanoids',
  'nonflavanoid_phenols',
  'proanthocyanins',
  'color_

In [3]:
# DataFrameで説明変数を表示
pd.DataFrame(wine.data, columns=wine.feature_names).head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [4]:
# 目的変数の表示
wine.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [5]:
# 説明変数と目的変数を格納
wine_data = wine.data[0:130]
wine_target = wine.target[0:130]

In [6]:
# データセットを訓練用とテスト用に分割
train_X, test_X, train_Y, test_Y = train_test_split(wine_data, wine_target, test_size=0.2)

# データの長さを確認
print(len(train_X))
print(len(test_X))

104
26


In [7]:
# 訓練用のテンソル作成
train_X = torch.from_numpy(train_X).float()
train_Y = torch.from_numpy(train_Y).long()

# テスト用のテンソル作成
test_X = torch.from_numpy(test_X).float()
test_Y = torch.from_numpy(test_Y).long()

# テンソルの件数を表示
print(train_X.shape)
print(train_Y.shape)

torch.Size([104, 13])
torch.Size([104])


In [8]:
# 説明変数と目的変数のテンソルをまとめる
train = TensorDataset(train_X, train_Y)

# 一つ目のテンソルを確認
print(train[0])

# ミニバッチに分ける
train_loader = DataLoader(train, batch_size=16, shuffle=True)

(tensor([  13.0500,    1.7700,    2.1000,   17.0000,  107.0000,    3.0000,
           3.0000,    0.2800,    2.0300,    5.0400,    0.8800,    3.3500,
         885.0000]), tensor(0))


In [9]:
# ネットワークの作成
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(13, 96)
        self.fc2 = nn.Linear(96, 96)
        self.fc3 = nn.Linear(96, 96)
        self.fc4 = nn.Linear(96, 96)
        self.fc5 = nn.Linear(96, 96)
        self.fc6 = nn.Linear(96, 2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x)
        return F.log_softmax(x)
    
# インスタンスの生成
model = Net()

In [10]:
# 誤差関数のセット
criterion = nn.CrossEntropyLoss()

# 最適化関数のセット
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 学習開始
for epoch in range(300):
    total_loss = 0
    # 分類したデータの取り出し
    for train_x, train_y in train_loader:
        # 計算グラフの構築
        train_x, train_y = Variable(train_x), Variable(train_y)
        # 勾配をリセットする
        optimizer.zero_grad()
        # 順伝播の計算
        output = model(train_x)
        # 誤差の計算
        loss = criterion(output, train_y)
        # 逆伝播の計算
        loss.backward()
        # 重みの更新
        optimizer.step()
        # 誤差の累積
        total_loss += loss.data[0]
    # 累積誤差を50回ごとに表示
    if (epoch+1) % 50 == 0:
        print(epoch+1, total_loss)



50 tensor(3.5128)
100 tensor(1.9565)
150 tensor(2.1512)
200 tensor(1.8958)
250 tensor(1.6826)
300 tensor(1.6133)


In [11]:
# 計算グラフの構築
test_x, test_y = Variable(test_X), Variable(test_Y)
# 出力が0と1のどちらか
result = torch.max(model(test_x).data, 1)[1]
# モデルの精度を計算
accurancy = sum(test_y.data.numpy() == result.numpy()) / len(test_y.data.numpy())

# モデル精度の表示
accurancy



0.96153846153846156