### Module Import & Set Device

In [5]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import time

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)  # 랜덤 시드 고정

if device == 'cuda':
    torch.cuda.manual_seed_all(777) # GPU 사용 가능할경우 랜덤 시드 고정

# Activation Function 구현

### Abstract Class for each Activation Funtion

In [7]:
class BaseActivation:
    def __init__(self):
        pass
    
    def __call__(self, x):
        return x
    
    def parameters(self):
        return []

### Sigmoid

In [8]:
# Hint: torch.exp함수를 사용해보세요.
# Hint: https://pytorch.org/docs/stable/generated/torch.nn.Sigmoid.html

class sigmoid(BaseActivation):
    def __call__(self, x):
        ################ TODO ################
        ret = 1/(1+torch.exp(-x))
        return ret
        ################ TODO ################

### ReLU
Relu(x) = max(0, x)

In [9]:
zero_tensor = torch.Tensor([0]).to(device)
print(zero_tensor)

tensor([0.], device='cuda:0')


In [10]:
# Hint: torch.maximum과 zero_tensor를 활용해보세요.
# Hint: https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html?highlight=relu#torch.nn.ReLU
class relu(BaseActivation):
    def __call__(self, x):
        ################ TODO ################
        ret = torch.maximum(x,zero_tensor)
        return ret
        ################ TODO ################

### Leaky ReLU

In [11]:
# Hint 1: torch.Tensor를 활용해 negative slope를 정의하세요
# Hint 2: torch.where를 활용해 조건에 따른 return값을 설정해보세요
# Hint: https://pytorch.org/docs/stable/generated/torch.nn.LeakyReLU.html?highlight=leakyrelu

class leakyrelu(BaseActivation):
    def __init__(self):
        ################ TODO ################
        self.negative_slope = torch.Tensor([1e-2]).to(device)
        ################ TODO ################
        
    def __call__(self, x):
        ################ TODO ################
        ret = torch.where(x>0,x,self.negative_slope*x)
        return ret
        ################ TODO ################

### PReLU

In [12]:
# Hint: 위와 마찬가지로 조건에 따른 return값을 설정해보세요.
# Hint: https://pytorch.org/docs/stable/generated/torch.nn.PReLU.html?highlight=prelu#torch.nn.PReLU

class prelu(BaseActivation):
    def __init__(self):
        self.alpha = torch.empty(1).fill_(0.25).to(device).requires_grad_()
        
    def __call__(self, x):
        ################ TODO ################
        ret = torch.where(x>=0, x, self.alpha*x)
        return ret
        ################ TODO ################
        
    def parameters(self):
        return [self.alpha]

### ELU

In [13]:
# Hint: https://pytorch.org/docs/stable/generated/torch.nn.ELU.html?highlight=elu#torch.nn.ELU

class elu(BaseActivation):
    def __init__(self):
        self.alpha = torch.empty(1).fill_(1.0).to(device).requires_grad_()
        
    def __call__(self, x):
        ################ TODO ################
        ret = torch.where(x>0,x,self.alpha*(torch.exp(x)-1))
        return ret
        ################ TODO ################
    
    def parameters(self):
        return [self.alpha]

###Your Custom Activation Function

In [14]:
# Custom activation을 만들어보세요.
class my_activation_1(BaseActivation):
    def __init__(self):
        self.alpha=1.01
        self.negative_slope =0.01
        
    def __call__(self, x):
        ################ TODO ################
        return torch.where(x>=0, self.alpha*x,self.negative_slope*x)
        ################ TODO ################

In [15]:
# Custom activation을 만들어보세요.
class my_activation_2(BaseActivation):
    def __init__(self):
        self.alpha =1.00
        self.negative_slope = 0.1
        
    def __call__(self, x):
        ################ TODO ################
        return torch.where(x>=0, self.alpha*x, self.negative_slope*torch.exp(x))
        ################ TODO ################

In [16]:
# Custom activation을 만들어보세요.
class my_activation_3(BaseActivation):
    def __init__(self):
        pass
        
    def __call__(self, x):
        ################ TODO ################
        return torch.mul(x,torch.div(1,torch.add(1,torch.exp(torch.negative(-x)))))
        ################ TODO ################

# MNIST 손글씨 이미지 분류하기


### 하이퍼파라미터 세팅

In [17]:
learning_rate = 0.01
epochs = 10
batch_size = 1024

### 데이터셋, 데이터 로더 정의

In [18]:
#손글씨 분류를 위한 데이터셋인 MNIST 데이터셋 다운로드

mnist_train = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=True, # True로 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서 데이터로 변환
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                         train=False, # False로 지정하면 테스트 데이터로 다운로드
                         transform=transforms.ToTensor(), # 텐서 데이터로 변환
                         download=True)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [19]:
# 다운로드 한 데이터셋을 이용해 데이터 로더 정의
# 데이터 로더는 iterable type으로, 반복문을 사용하여 한 번에 배치 사이즈만큼 데이터를 불러올 수 있음

data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

total_batch = len(data_loader)  # Mini-batch의 개수 = (전체 데이터 수 / batch size)

### 모델(신경망) 구조 정의

In [20]:
class Net(torch.nn.Module):
    def __init__(self, use):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(28*28, 64) # input image shape가 가로 28, 세로 28이므로 28*28
        self.fc2 = torch.nn.Linear(64, 32)
        self.fc3 = torch.nn.Linear(32, 16)
        self.fc4 = torch.nn.Linear(16, 10) # 최종 예측 클래스 종류는 0~9, 10가지이므로 10개
        
        # 파라미터로 주어진 activation 종다르게 설정
        if use == 'relu':
            self.activation = relu()
        elif use == 'sigmoid':
            self.activation = sigmoid()
        elif use == 'prelu':
            self.activation = prelu()
        elif use == 'leaky_relu':
            self.activation = leakyrelu()
        elif use == 'elu':
            self.activation = elu()
        elif use == 'custom_1':
            self.activation = my_activation_1()
        elif use == 'custom_2':
            self.activation = my_activation_2()
        elif use == 'custom_3':
            self.activation = my_activation_3()
            
    # fc1 -> activation -> fc2
    def forward(self, x):
        x = x.view(-1, 784)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.fc4(x)
        return x

### 모델 Training

In [21]:
# 모델을 생성하고 학습시킨 후, 성능을 performance_dict에 저장하는 함수
def train_eval(use):
    global performance_dict
    model = Net(use).to(device)
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(list(model.parameters()) + model.activation.parameters(), lr=learning_rate)

    start = time.time(); test_accs = []
    print(f"\n###### Using Activation Function: {use.upper()} ######")
    for epoch in range(epochs):
        avg_loss = 0
        avg_acc = 0

        for X, Y in data_loader:
            X = X.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()
            prediction = model(X)
            loss = criterion(prediction, Y)
            acc = (torch.argmax(prediction, 1) == Y).float().mean().item()
            loss.backward()
            optimizer.step()

            avg_loss += loss / total_batch
            avg_acc += acc / total_batch

        print(f'[Epoch: {epoch+1:>2}] train_loss: {avg_loss:.4f}, train_acc: {avg_acc:.4f}, ', end="")

        with torch.no_grad():
            X_test = mnist_test.data.view(len(mnist_test), 1, 28, 28).float().to(device)
            Y_test = mnist_test.targets.to(device)

            test_prediction = model(X_test)
            correct_prediction = torch.argmax(test_prediction, 1) == Y_test
            test_accuracy = correct_prediction.float().mean().item()
            test_accs.append(test_accuracy)
            print(f"test_acc: {test_accuracy:.4f}, best_acc: {max(test_accs):.4f}")
    
    performance_dict[use] = {'time': time.time()-start, 'accuracy' : max(test_accs)}

### 성능 확인

In [22]:
performance_dict = {}
for use in ['relu','sigmoid','prelu', 'leaky_relu', 'elu','custom_1','custom_2','custom_3']:   #위에서 정의한 각 활성화함수를 이용해 훈련
    train_eval(use)
performance_df = pd.DataFrame(performance_dict).T


###### Using Activation Function: RELU ######
[Epoch:  1] train_loss: 0.8153, train_acc: 0.7293, test_acc: 0.9053, best_acc: 0.9053
[Epoch:  2] train_loss: 0.2387, train_acc: 0.9306, test_acc: 0.9416, best_acc: 0.9416
[Epoch:  3] train_loss: 0.1682, train_acc: 0.9501, test_acc: 0.9548, best_acc: 0.9548
[Epoch:  4] train_loss: 0.1317, train_acc: 0.9600, test_acc: 0.9496, best_acc: 0.9548
[Epoch:  5] train_loss: 0.1042, train_acc: 0.9693, test_acc: 0.9622, best_acc: 0.9622
[Epoch:  6] train_loss: 0.0889, train_acc: 0.9728, test_acc: 0.9662, best_acc: 0.9662
[Epoch:  7] train_loss: 0.0782, train_acc: 0.9756, test_acc: 0.9622, best_acc: 0.9662
[Epoch:  8] train_loss: 0.0691, train_acc: 0.9785, test_acc: 0.9689, best_acc: 0.9689
[Epoch:  9] train_loss: 0.0624, train_acc: 0.9804, test_acc: 0.9681, best_acc: 0.9689
[Epoch: 10] train_loss: 0.0530, train_acc: 0.9832, test_acc: 0.9667, best_acc: 0.9689

###### Using Activation Function: SIGMOID ######
[Epoch:  1] train_loss: 1.7322, train_acc: 

In [23]:
performance_df

Unnamed: 0,time,accuracy
relu,48.611004,0.9689
sigmoid,44.401778,0.9557
prelu,45.749871,0.9682
leaky_relu,46.419384,0.9705
elu,44.519744,0.9288
custom_1,44.594411,0.9671
custom_2,44.576229,0.971
custom_3,44.784685,0.9682


In [None]:
print(f"Best Accuracy Activation Function was: {performance_df['accuracy'].max():.4f}, {performance_df.index[performance_df['accuracy'].argmax()]}")
print(f"Custom Activation Function Accuracy Rank: {int(performance_df['accuracy'].rank(method='max', ascending=False)['custom_1'])}")
print(f"Custom Activation Function Accuracy Rank: {int(performance_df['accuracy'].rank(method='max', ascending=False)['custom_2'])}")
print(f"Custom Activation Function Accuracy Rank: {int(performance_df['accuracy'].rank(method='max', ascending=False)['custom_3'])}")
print(f"Custom Activation Function Time Complexity Rank: {int(performance_df['time'].rank(method='min')['custom_1'])}")
print(f"Custom Activation Function Time Complexity Rank: {int(performance_df['time'].rank(method='min')['custom_2'])}")
print(f"Custom Activation Function Time Complexity Rank: {int(performance_df['time'].rank(method='min')['custom_3'])}")

plt.figure(figsize=(8, 6))
sns.set_style('darkgrid')
plt.rcParams.update({'font.size': 15})
plt.title("Activation Function Compare Plot")
sns.scatterplot(data=performance_df, x='time', y='accuracy', hue=performance_df.index, s=100)
plt.show()

Best Accuracy Activation Function was: 0.9710, custom_2
Custom Activation Function Accuracy Rank: 6
Custom Activation Function Accuracy Rank: 1
Custom Activation Function Accuracy Rank: 5
Custom Activation Function Time Complexity Rank: 4
Custom Activation Function Time Complexity Rank: 3
Custom Activation Function Time Complexity Rank: 5
