<a href="https://colab.research.google.com/github/dokeyuka/deep-learning-practice/blob/main/DL_practice3_activation_func.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 活性化関数の工夫
### 出力層における活性化関数 = 確率を出力する関数でなければならない(Sigmoid, softmax)
### 隠れ層は別の関数でもよい→勾配消失問題を解決


## 1 tanh
### Sigmoidよりaccuracy上がった！！
0.861→0.953

In [2]:
import os
import numpy as np
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn 
import torch.optim as optimizers
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

'''
  (1) modelの実装
'''
class DNN(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim):
    super().__init__()
    self.l1 = nn.Linear(input_dim, hidden_dim)
    self.a1 = nn.Tanh()
    self.l2 = nn.Linear(hidden_dim, hidden_dim)
    self.a2 = nn.Tanh()
    self.l3 = nn.Linear(hidden_dim,hidden_dim)
    self.a3 = nn.Tanh()
    self.l4 = nn.Linear(hidden_dim, output_dim)
    
    self.layers = [self.l1, self.a1,
                    self.l2, self.a2,
                    self.l3, self.a3,
                    self.l4]

  def forward(self,x):
    for layer in self.layers:
      x = layer(x)
    return x



if __name__ == '__main__':
  np.random.seed(123)
  #torch用の乱数シード
  torch.manual_seed(123)

  #deviceに実行環境を格納して同じコードでCPUでもGPUでも対応できるように
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
'''
  1 データの準備
'''
root = os.path.join('~', '.torch', 'mnist')
#numpyをTテンensorに変換し、さらにTensorの次元を(28，28)から(784、)に変換
transform = transforms.Compose([transforms.ToTensor(), lambda x: x.view(-1)])

mnist_train = datasets.MNIST(root = root,
                             download = True,
                             train = True,
                             transform = transform)
mnist_test = datasets.MNIST(root = root,
                            download = True,
                            train = False,
                            transform = transform)


#学習に用いるためにデータセットをDataLoaderオブジェクトに変換
#minibatch学習の時にバッチ単位でデータ処理できる、かつ、各epochでデータシャッフル可能
train_dataloader = DataLoader(mnist_train, 
                              batch_size = 100,
                              shuffle = True)

test_dataloader = DataLoader(mnist_test,
                             batch_size = 100,
                             shuffle = False)

'''
  2 モデルの構築
'''
model = DNN(784, 200, 10).to(device)

'''
  3　モデルの学習
'''
criterion = nn.CrossEntropyLoss()
optimizer = optimizers.SGD(model.parameters(), lr = 0.01)

def compute_loss(t, y):
  return criterion(y, t)

def train_step(x, t):
  model.train()
  preds = model(x)
  loss = compute_loss(t, preds)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  return loss, preds

epochs = 30

for epoch in range(epochs):
  train_loss = 0.
  train_acc = 0.

  for(x, t) in train_dataloader:
    x,t = x.to(device), t.to(device)
    loss, preds = train_step(x,t)
    train_loss += loss.item()
    #accuracy_socreはテンソル型をうけとれないので.tolist()を実行
    train_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
  
  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)


  print('epoch:{}, loss: {:.3f}, a㏄:{:.3f}'.format(epoch +1,
                                                    train_loss,
                                                    train_acc))

'''
  4 モデルの評価
'''

def test_step(x,t):
  model.eval()
  preds = model(x)
  loss = criterion(preds, t)
  return loss, preds

test_loss = 0.
test_acc = 0.

for(x,t) in test_dataloader:
  x, t = x.to(device), t.to(device)
  loss, preds = test_step(x,t)
  test_loss += loss.item()
  test_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)


print('epoch:{}, loss: {:.3f}, acc:{:.3f}'.format(epoch +1,
                                                    test_loss,
                                                    test_acc))


epoch:1, loss: 1.835, a㏄:0.562
epoch:2, loss: 0.755, a㏄:0.809
epoch:3, loss: 0.487, a㏄:0.870
epoch:4, loss: 0.402, a㏄:0.889
epoch:5, loss: 0.363, a㏄:0.897
epoch:6, loss: 0.339, a㏄:0.903
epoch:7, loss: 0.322, a㏄:0.908
epoch:8, loss: 0.308, a㏄:0.912
epoch:9, loss: 0.297, a㏄:0.915
epoch:10, loss: 0.287, a㏄:0.917
epoch:11, loss: 0.278, a㏄:0.919
epoch:12, loss: 0.270, a㏄:0.922
epoch:13, loss: 0.263, a㏄:0.924
epoch:14, loss: 0.256, a㏄:0.926
epoch:15, loss: 0.249, a㏄:0.928
epoch:16, loss: 0.243, a㏄:0.930
epoch:17, loss: 0.236, a㏄:0.931
epoch:18, loss: 0.230, a㏄:0.933
epoch:19, loss: 0.224, a㏄:0.935
epoch:20, loss: 0.217, a㏄:0.937
epoch:21, loss: 0.211, a㏄:0.939
epoch:22, loss: 0.205, a㏄:0.941
epoch:23, loss: 0.199, a㏄:0.942
epoch:24, loss: 0.193, a㏄:0.945
epoch:25, loss: 0.187, a㏄:0.946
epoch:26, loss: 0.181, a㏄:0.948
epoch:27, loss: 0.176, a㏄:0.950
epoch:28, loss: 0.170, a㏄:0.951
epoch:29, loss: 0.165, a㏄:0.953
epoch:30, loss: 0.160, a㏄:0.955
epoch:30, loss: 0.163, acc:0.953


## 2 ReLU
導関数が1→勾配が消失することがない<br>
学習が早い<br>
but<br>
一度不活性になるとずっと不活性<br>
学習率を大きい値に設定すると初めの誤差逆伝搬でニューロンの値が小さくなりすぎてしまう

### Sigmoid,Tanhよりaccuracy上がった！！
0.861→0.953→0.965

In [3]:
import os
import numpy as np
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn 
import torch.optim as optimizers
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

'''
  (1) modelの実装
'''
class DNN(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim):
    super().__init__()
    self.l1 = nn.Linear(input_dim, hidden_dim)
    self.a1 = nn.ReLU()
    self.l2 = nn.Linear(hidden_dim, hidden_dim)
    self.a2 = nn.ReLU()
    self.l3 = nn.Linear(hidden_dim,hidden_dim)
    self.a3 = nn.ReLU()
    self.l4 = nn.Linear(hidden_dim, output_dim)
    
    self.layers = [self.l1, self.a1,
                    self.l2, self.a2,
                    self.l3, self.a3,
                    self.l4]

  def forward(self,x):
    for layer in self.layers:
      x = layer(x)
    return x



if __name__ == '__main__':
  np.random.seed(123)
  #torch用の乱数シード
  torch.manual_seed(123)

  #deviceに実行環境を格納して同じコードでCPUでもGPUでも対応できるように
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
'''
  1 データの準備
'''
root = os.path.join('~', '.torch', 'mnist')
#numpyをTテンensorに変換し、さらにTensorの次元を(28，28)から(784、)に変換
transform = transforms.Compose([transforms.ToTensor(), lambda x: x.view(-1)])

mnist_train = datasets.MNIST(root = root,
                             download = True,
                             train = True,
                             transform = transform)
mnist_test = datasets.MNIST(root = root,
                            download = True,
                            train = False,
                            transform = transform)


#学習に用いるためにデータセットをDataLoaderオブジェクトに変換
#minibatch学習の時にバッチ単位でデータ処理できる、かつ、各epochでデータシャッフル可能
train_dataloader = DataLoader(mnist_train, 
                              batch_size = 100,
                              shuffle = True)

test_dataloader = DataLoader(mnist_test,
                             batch_size = 100,
                             shuffle = False)

'''
  2 モデルの構築
'''
model = DNN(784, 200, 10).to(device)

'''
  3　モデルの学習
'''
criterion = nn.CrossEntropyLoss()
optimizer = optimizers.SGD(model.parameters(), lr = 0.01)

def compute_loss(t, y):
  return criterion(y, t)

def train_step(x, t):
  model.train()
  preds = model(x)
  loss = compute_loss(t, preds)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  return loss, preds

epochs = 30

for epoch in range(epochs):
  train_loss = 0.
  train_acc = 0.

  for(x, t) in train_dataloader:
    x,t = x.to(device), t.to(device)
    loss, preds = train_step(x,t)
    train_loss += loss.item()
    #accuracy_socreはテンソル型をうけとれないので.tolist()を実行
    train_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
  
  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)


  print('epoch:{}, loss: {:.3f}, a㏄:{:.3f}'.format(epoch +1,
                                                    train_loss,
                                                    train_acc))

'''
  4 モデルの評価
'''

def test_step(x,t):
  model.eval()
  preds = model(x)
  loss = criterion(preds, t)
  return loss, preds

test_loss = 0.
test_acc = 0.

for(x,t) in test_dataloader:
  x, t = x.to(device), t.to(device)
  loss, preds = test_step(x,t)
  test_loss += loss.item()
  test_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)


print('epoch:{}, loss: {:.3f}, acc:{:.3f}'.format(epoch +1,
                                                    test_loss,
                                                    test_acc))


epoch:1, loss: 2.273, a㏄:0.242
epoch:2, loss: 1.764, a㏄:0.617
epoch:3, loss: 0.690, a㏄:0.815
epoch:4, loss: 0.462, a㏄:0.871
epoch:5, loss: 0.391, a㏄:0.888
epoch:6, loss: 0.355, a㏄:0.898
epoch:7, loss: 0.330, a㏄:0.905
epoch:8, loss: 0.309, a㏄:0.911
epoch:9, loss: 0.291, a㏄:0.917
epoch:10, loss: 0.275, a㏄:0.921
epoch:11, loss: 0.259, a㏄:0.926
epoch:12, loss: 0.245, a㏄:0.930
epoch:13, loss: 0.232, a㏄:0.934
epoch:14, loss: 0.220, a㏄:0.938
epoch:15, loss: 0.208, a㏄:0.940
epoch:16, loss: 0.198, a㏄:0.943
epoch:17, loss: 0.188, a㏄:0.946
epoch:18, loss: 0.179, a㏄:0.949
epoch:19, loss: 0.170, a㏄:0.951
epoch:20, loss: 0.162, a㏄:0.953
epoch:21, loss: 0.155, a㏄:0.955
epoch:22, loss: 0.148, a㏄:0.958
epoch:23, loss: 0.142, a㏄:0.959
epoch:24, loss: 0.135, a㏄:0.961
epoch:25, loss: 0.130, a㏄:0.962
epoch:26, loss: 0.124, a㏄:0.964
epoch:27, loss: 0.119, a㏄:0.965
epoch:28, loss: 0.114, a㏄:0.967
epoch:29, loss: 0.110, a㏄:0.968
epoch:30, loss: 0.106, a㏄:0.969
epoch:30, loss: 0.114, acc:0.965


## 3 Leaky ReLU
f(x) = max(αx, x)
x < 0でも学習が進む！！ 
### accuracy！！
0.861→0.953→0.965→0.964

In [4]:
import os
import numpy as np
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn 
import torch.optim as optimizers
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

'''
  (1) modelの実装
'''
class DNN(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim):
    super().__init__()
    self.l1 = nn.Linear(input_dim, hidden_dim)
    self.a1 = nn.LeakyReLU(0.01)
    self.l2 = nn.Linear(hidden_dim, hidden_dim)
    self.a2 = nn.LeakyReLU(0.01)
    self.l3 = nn.Linear(hidden_dim,hidden_dim)
    self.a3 = nn.LeakyReLU(0.01)
    self.l4 = nn.Linear(hidden_dim, output_dim)
    
    self.layers = [self.l1, self.a1,
                    self.l2, self.a2,
                    self.l3, self.a3,
                    self.l4]

  def forward(self,x):
    for layer in self.layers:
      x = layer(x)
    return x



if __name__ == '__main__':
  np.random.seed(123)
  #torch用の乱数シード
  torch.manual_seed(123)

  #deviceに実行環境を格納して同じコードでCPUでもGPUでも対応できるように
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
'''
  1 データの準備
'''
root = os.path.join('~', '.torch', 'mnist')
#numpyをTテンensorに変換し、さらにTensorの次元を(28，28)から(784、)に変換
transform = transforms.Compose([transforms.ToTensor(), lambda x: x.view(-1)])

mnist_train = datasets.MNIST(root = root,
                             download = True,
                             train = True,
                             transform = transform)
mnist_test = datasets.MNIST(root = root,
                            download = True,
                            train = False,
                            transform = transform)


#学習に用いるためにデータセットをDataLoaderオブジェクトに変換
#minibatch学習の時にバッチ単位でデータ処理できる、かつ、各epochでデータシャッフル可能
train_dataloader = DataLoader(mnist_train, 
                              batch_size = 100,
                              shuffle = True)

test_dataloader = DataLoader(mnist_test,
                             batch_size = 100,
                             shuffle = False)

'''
  2 モデルの構築
'''
model = DNN(784, 200, 10).to(device)

'''
  3　モデルの学習
'''
criterion = nn.CrossEntropyLoss()
optimizer = optimizers.SGD(model.parameters(), lr = 0.01)

def compute_loss(t, y):
  return criterion(y, t)

def train_step(x, t):
  model.train()
  preds = model(x)
  loss = compute_loss(t, preds)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  return loss, preds

epochs = 30

for epoch in range(epochs):
  train_loss = 0.
  train_acc = 0.

  for(x, t) in train_dataloader:
    x,t = x.to(device), t.to(device)
    loss, preds = train_step(x,t)
    train_loss += loss.item()
    #accuracy_socreはテンソル型をうけとれないので.tolist()を実行
    train_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
  
  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)


  print('epoch:{}, loss: {:.3f}, a㏄:{:.3f}'.format(epoch +1,
                                                    train_loss,
                                                    train_acc))

'''
  4 モデルの評価
'''

def test_step(x,t):
  model.eval()
  preds = model(x)
  loss = criterion(preds, t)
  return loss, preds

test_loss = 0.
test_acc = 0.

for(x,t) in test_dataloader:
  x, t = x.to(device), t.to(device)
  loss, preds = test_step(x,t)
  test_loss += loss.item()
  test_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)


print('epoch:{}, loss: {:.3f}, acc:{:.3f}'.format(epoch +1,
                                                    test_loss,
                                                    test_acc))


epoch:1, loss: 2.273, a㏄:0.245
epoch:2, loss: 1.756, a㏄:0.620
epoch:3, loss: 0.686, a㏄:0.816
epoch:4, loss: 0.461, a㏄:0.871
epoch:5, loss: 0.390, a㏄:0.888
epoch:6, loss: 0.355, a㏄:0.898
epoch:7, loss: 0.329, a㏄:0.905
epoch:8, loss: 0.309, a㏄:0.912
epoch:9, loss: 0.291, a㏄:0.917
epoch:10, loss: 0.275, a㏄:0.921
epoch:11, loss: 0.260, a㏄:0.925
epoch:12, loss: 0.246, a㏄:0.929
epoch:13, loss: 0.233, a㏄:0.934
epoch:14, loss: 0.221, a㏄:0.937
epoch:15, loss: 0.209, a㏄:0.940
epoch:16, loss: 0.199, a㏄:0.943
epoch:17, loss: 0.189, a㏄:0.945
epoch:18, loss: 0.180, a㏄:0.948
epoch:19, loss: 0.171, a㏄:0.951
epoch:20, loss: 0.163, a㏄:0.953
epoch:21, loss: 0.156, a㏄:0.955
epoch:22, loss: 0.149, a㏄:0.958
epoch:23, loss: 0.143, a㏄:0.959
epoch:24, loss: 0.136, a㏄:0.961
epoch:25, loss: 0.131, a㏄:0.962
epoch:26, loss: 0.125, a㏄:0.964
epoch:27, loss: 0.120, a㏄:0.965
epoch:28, loss: 0.115, a㏄:0.967
epoch:29, loss: 0.111, a㏄:0.968
epoch:30, loss: 0.107, a㏄:0.969
epoch:30, loss: 0.114, acc:0.964


## 4 Swish
f(x) = x σ(βx) <br>
全範囲で微分可能<br>
but<br>
計算コスト

In [6]:
import os
import numpy as np
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn 
import torch.optim as optimizers
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

'''
  (1) modelの実装
'''
class Swish(nn.Module):
  #nn.Moduleを継承
  def __init__(self, beta = 1.):
    super().__init__()
    self.beta = beta
  def forward(self, x):
    return x * torch.sigmoid(self.beta*x)

class DNN(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim):
    super().__init__()
    self.l1 = nn.Linear(input_dim, hidden_dim)
    self.a1 = Swish()
    self.l2 = nn.Linear(hidden_dim, hidden_dim)
    self.a2 = Swish()
    self.l3 = nn.Linear(hidden_dim,hidden_dim)
    self.a3 = Swish()
    self.l4 = nn.Linear(hidden_dim, output_dim)
    
    self.layers = [self.l1, self.a1,
                    self.l2, self.a2,
                    self.l3, self.a3,
                    self.l4]

  def forward(self,x):
    for layer in self.layers:
      x = layer(x)
    return x



if __name__ == '__main__':
  np.random.seed(123)
  #torch用の乱数シード
  torch.manual_seed(123)

  #deviceに実行環境を格納して同じコードでCPUでもGPUでも対応できるように
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
'''
  1 データの準備
'''
root = os.path.join('~', '.torch', 'mnist')
#numpyをTテンensorに変換し、さらにTensorの次元を(28，28)から(784、)に変換
transform = transforms.Compose([transforms.ToTensor(), lambda x: x.view(-1)])

mnist_train = datasets.MNIST(root = root,
                             download = True,
                             train = True,
                             transform = transform)
mnist_test = datasets.MNIST(root = root,
                            download = True,
                            train = False,
                            transform = transform)


#学習に用いるためにデータセットをDataLoaderオブジェクトに変換
#minibatch学習の時にバッチ単位でデータ処理できる、かつ、各epochでデータシャッフル可能
train_dataloader = DataLoader(mnist_train, 
                              batch_size = 100,
                              shuffle = True)

test_dataloader = DataLoader(mnist_test,
                             batch_size = 100,
                             shuffle = False)

'''
  2 モデルの構築
'''
model = DNN(784, 200, 10).to(device)

'''
  3　モデルの学習
'''
criterion = nn.CrossEntropyLoss()
optimizer = optimizers.SGD(model.parameters(), lr = 0.01)

def compute_loss(t, y):
  return criterion(y, t)

def train_step(x, t):
  model.train()
  preds = model(x)
  loss = compute_loss(t, preds)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  return loss, preds

epochs = 30

for epoch in range(epochs):
  train_loss = 0.
  train_acc = 0.

  for(x, t) in train_dataloader:
    x,t = x.to(device), t.to(device)
    loss, preds = train_step(x,t)
    train_loss += loss.item()
    #accuracy_socreはテンソル型をうけとれないので.tolist()を実行
    train_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
  
  train_loss /= len(train_dataloader)
  train_acc /= len(train_dataloader)


  print('epoch:{}, loss: {:.3f}, a㏄:{:.3f}'.format(epoch +1,
                                                    train_loss,
                                                    train_acc))

'''
  4 モデルの評価
'''

def test_step(x,t):
  model.eval()
  preds = model(x)
  loss = criterion(preds, t)
  return loss, preds

test_loss = 0.
test_acc = 0.

for(x,t) in test_dataloader:
  x, t = x.to(device), t.to(device)
  loss, preds = test_step(x,t)
  test_loss += loss.item()
  test_acc += accuracy_score(t.tolist(), preds.argmax(dim = -1).tolist())
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)


print('epoch:{}, loss: {:.3f}, acc:{:.3f}'.format(epoch +1,
                                                    test_loss,
                                                    test_acc))


epoch:1, loss: 2.300, a㏄:0.123
epoch:2, loss: 2.289, a㏄:0.180
epoch:3, loss: 2.268, a㏄:0.261
epoch:4, loss: 2.154, a㏄:0.291
epoch:5, loss: 1.684, a㏄:0.518
epoch:6, loss: 0.819, a㏄:0.759
epoch:7, loss: 0.559, a㏄:0.840
epoch:8, loss: 0.454, a㏄:0.872
epoch:9, loss: 0.405, a㏄:0.885
epoch:10, loss: 0.377, a㏄:0.893
epoch:11, loss: 0.356, a㏄:0.898
epoch:12, loss: 0.340, a㏄:0.904
epoch:13, loss: 0.326, a㏄:0.908
epoch:14, loss: 0.314, a㏄:0.910
epoch:15, loss: 0.303, a㏄:0.914
epoch:16, loss: 0.294, a㏄:0.917
epoch:17, loss: 0.285, a㏄:0.918
epoch:18, loss: 0.278, a㏄:0.920
epoch:19, loss: 0.271, a㏄:0.923
epoch:20, loss: 0.264, a㏄:0.924
epoch:21, loss: 0.260, a㏄:0.925
epoch:22, loss: 0.254, a㏄:0.927
epoch:23, loss: 0.249, a㏄:0.928
epoch:24, loss: 0.243, a㏄:0.930
epoch:25, loss: 0.237, a㏄:0.932
epoch:26, loss: 0.232, a㏄:0.934
epoch:27, loss: 0.227, a㏄:0.934
epoch:28, loss: 0.221, a㏄:0.936
epoch:29, loss: 0.215, a㏄:0.938
epoch:30, loss: 0.210, a㏄:0.940
epoch:30, loss: 0.211, acc:0.938
