In [1]:
import numpy as np

def load_data_small():
    """ Load small training and validation dataset

        Returns a tuple of length 4 with the following objects:
        X_train: An N_train-x-M ndarray containing the training data (N_train examples, M features each)
        y_train: An N_train-x-1 ndarray contraining the labels
        X_val: An N_val-x-M ndarray containing the training data (N_val examples, M features each)
        y_val: An N_val-x-1 ndarray contraining the labels
    """
    train_all = np.loadtxt('HW4/data/smallTrain.csv', dtype=int, delimiter=',')
    valid_all = np.loadtxt('HW4/data/smallValidation.csv', dtype=int, delimiter=',')

    X_train = train_all[:, 1:]
    y_train = train_all[:, 0]
    X_val = valid_all[:, 1:]
    y_val = valid_all[:, 0]

    return (X_train, y_train, X_val, y_val)


def load_data_medium():
    """ Load medium training and validation dataset

        Returns a tuple of length 4 with the following objects:
        X_train: An N_train-x-M ndarray containing the training data (N_train examples, M features each)
        y_train: An N_train-x-1 ndarray contraining the labels
        X_val: An N_val-x-M ndarray containing the training data (N_val examples, M features each)
        y_val: An N_val-x-1 ndarray contraining the labels
    """
    train_all = np.loadtxt('HW4/data/mediumTrain.csv', dtype=int, delimiter=',')
    valid_all = np.loadtxt('HW4/data/mediumValidation.csv', dtype=int, delimiter=',')

    X_train = train_all[:, 1:]
    y_train = train_all[:, 0]
    X_val = valid_all[:, 1:]
    y_val = valid_all[:, 0]

    return (X_train, y_train, X_val, y_val)


def load_data_large():
    """ Load large training and validation dataset

        Returns a tuple of length 4 with the following objects:
        X_train: An N_train-x-M ndarray containing the training data (N_train examples, M features each)
        y_train: An N_train-x-1 ndarray contraining the labels
        X_val: An N_val-x-M ndarray containing the training data (N_val examples, M features each)
        y_val: An N_val-x-1 ndarray contraining the labels
    """
    train_all = np.loadtxt('HW4/data/largeTrain.csv', dtype=int, delimiter=',')
    valid_all = np.loadtxt('HW4/data/largeValidation.csv', dtype=int, delimiter=',')

    X_train = train_all[:, 1:]
    y_train = train_all[:, 0]
    X_val = valid_all[:, 1:]
    y_val = valid_all[:, 0]

    return (X_train, y_train, X_val, y_val)

X_train, y_train, X_val, y_val = load_data_small()

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
%matplotlib inline
%config InlineBackend.figure_format='retina' # 레티나 설정 - 폰트 주변이 흐릿하게 보이는 것을 방지해 글씨가 좀 더 선명하게 보임

print ("PyTorch version:[%s]."%(torch.__version__)) # 토치 버전 확인

# device에 일반 GPU or M1 GPU or CPU를 할당해주는 코드
if torch.cuda.is_available() : # 일반 GPU 사용시
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
elif torch.backends.mps.is_available(): # 맥 M1 GPU 사용시
    device = torch.device('mps:0' if torch.backends.mps.is_available() else 'cpu')
else:
    device = torch.device('cpu')
print ("device:[%s]."%(device))

# Make our np array to dataloader
from torch.utils.data import TensorDataset, DataLoader

my_x = X_train # a list of numpy arrays
my_y = y_train# another list of numpy arrays (targets)

tensor_x = torch.Tensor(my_x) # transform to torch tensor
tensor_y = torch.Tensor(my_y)

train = TensorDataset(tensor_x,tensor_y) # create your datset
train_iter = DataLoader(train) # create your dataloader


my_x = X_val # a list of numpy arrays
my_y = y_val# another list of numpy arrays (targets)

tensor_x = torch.Tensor(my_x) # transform to torch tensor
tensor_y = torch.Tensor(my_y)

test = TensorDataset(tensor_x,tensor_y) # create your datset
test_iter = DataLoader(train) # create your dataloader

PyTorch version:[2.1.1].
device:[mps:0].


In [None]:
class MultiLayerPerceptronClass(nn.Module):
    """
        Multilayer Perceptron (MLP) Class - nn.Module을 상속하는 클래스임
        __init__ : 변수 초기화
            name : 모델명
            xdim : input 데이터 크기
            hdim : 히든레이어 크기
            ydim : output 데이터 크기
            lin_1 : input - hidden1 선형변환
            lin_2 : hidden1 - output 선형변환
            init_param : 파라미터 초기화

        init_param : 파라미터 초기화
            nn.init.kaiming_normal_(weight) : 가중치 텐서에 정규분포 N(0, std^2) 를 따르는 He 초기화를 실행함
            nn.init.zeros_(bia)             : 편향 텐서에 스칼라 0으로 채움

        forward : 순전파 실행
            input - 선형변환1 - 활성화함수(렐루) - 선형변환2 - output
    """
    def __init__(self,name='mlp',xdim=128,hdim=10,ydim=10,init_flag = True):
        super(MultiLayerPerceptronClass,self).__init__()
        self.name = name
        self.xdim = xdim
        self.hdim = hdim
        self.ydim = ydim
        self.lin_1 = nn.Linear(self.xdim, self.hdim)
        self.lin_2 = nn.Linear(self.hdim, self.ydim)
        self.init_param(init_flag) # initialize parameters

    def init_param(self,init_falg):
        # nn.init.kaiming_normal_(self.lin_1.weight)
        # nn.init.zeros_(self.lin_1.bias)
        # nn.init.kaiming_normal_(self.lin_2.weight)
        # nn.init.zeros_(self.lin_2.bias)
        if init_falg:
            nn.init.uniform_(self.lin_1.weight,[-0.1,0.1])
            nn.init.uniform_(self.lin_1.bias,[-0.1,0.1])
            nn.init.uniform_(self.lin_2.weight,[-0.1,0.1])
            nn.init.uniform_(self.lin_2.bias,[-0.1,0.1])
        else:
            nn.init.zeros_(self.lin_1.weight)
            nn.init.zeros_(self.lin_1.bias)
            nn.init.zeros_(self.lin_2.weight)
            nn.init.zeros_(self.lin_2.bias)

    def forward(self,x):
        net = x
        net = self.lin_1(net)
        net = F.sigmoid(net) # sigmoid activation
        net = self.lin_2(net)
        return net
    
def func_eval(model,data_iter,device):
    '''
    model
        모델 변수 지정
    data_iter
        torch.utils.data.DataLoader로 지정된 변수 지정
    device
        GPU or CPU 지정
    '''
    with torch.no_grad():
    # with torch.no_grad()의 주된 목적은 autograd을 끔으로서 메모리 사용량 줄이고 연산속도 향상시킴
        model.eval() # evaluate (affects DropOut and BN)
        n_total,n_correct = 0,0
        for batch_in,batch_out in data_iter:
        # 데이터셋을 batch_size 갯수만큼 feed -> X.shape : (256,1,28,28), Y.shape : (256)
            y_trgt = batch_out.to(device)
            model_pred = model(batch_in.view(-1,16*8).to(device))
            # model(x값) (256, 1*28*28)으로 변환, 1,28,28인 데이터를 한줄로 펼친다고 생각하면 된다
            _,y_pred = torch.max(model_pred.data,1)
            # 예측값 중 가장 높은 값 1개를 반환
            n_correct += (y_trgt == y_pred).sum().item()
            # 정답과 예측값이 경우만 카운트
            n_total += batch_in.size(0)# feed된 데이터 수 카운트
        val_accr = (n_correct/n_total) # 정확도 : 일치값 수 / feed된 데이터 수
        model.train() # back to train mode -> 역전파실행
    return val_accr

M = MultiLayerPerceptronClass(name='mlp',xdim=128,hdim=10,ydim=10).to(device)
loss = nn.CrossEntropyLoss() # 교차 엔트로피 손실
optm = optim.Adam(M.parameters(),lr=1e-3) # 옵티마이저 : 아담, 학습률 1e-3

print ("Start training.")
M.init_param() # initialize parameters
M.train()
EPOCHS,print_every = 10,1 # 학습 횟수, 출력 조건
for epoch in range(EPOCHS):
    loss_val_sum = 0
    for batch_in,batch_out in train_iter:
    # batch_in - X, batch_out - Y
        # Forward path
        y_pred = M.forward(batch_in.view(-1, 16*8).to(device)) # 예측값 추출
        loss_out = loss(y_pred,batch_out.to(device)) #정답값과 예측값의 loss 계산

        # Update - backward path
        optm.zero_grad()    # reset gradient - 새로 계산한 미분값을 넣어주기 전에 기존에 구한 미분값을 reset
        loss_out.backward() # backpropagate - 미분값 계산
        optm.step()         # optimizer update - 구해진 미분값으로 가중치 업데이트

        # loss 저장
        loss_val_sum += loss_out
    loss_val_avg = loss_val_sum/len(train_iter)
    # print_every 단위로 loss와 정확도 Print
    if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
        train_accr = func_eval(M,train_iter,device)
        test_accr = func_eval(M,test_iter,device)
        print ("epoch:[%d] loss:[%.3f] train_accr:[%.3f] test_accr:[%.3f]."%
               (epoch,loss_val_avg,train_accr,test_accr))
print ("Done")

In [5]:
X_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 1, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [7]:
for x,y in train_iter:
    print(x.view(-1,8,16))
    print(y)
    print

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.],
         [0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1.],
         [0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1.],
         [1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 0., 0.],
         [0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]])
tensor([6.])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1.],
         [1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1.],
         [

In [None]:
num = [1,2,3,4,5,6,7,8]
tensor = torch.tensor(num)
a = tensor.view(-1,1,2,2) # a.shape : (2, 1, 2 ,2)
print('변환 전')
print(a)
print('변환 후')
a.view(-1,2*2*1)
