In [30]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

### 1. **XOR**

In [74]:
X = torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y = torch.FloatTensor([[1],[0],[0],[1]]).to(device) #XOR의 답

linear = torch.nn.Linear(2, 1, bias = True) #퍼셉트론은 한 개의 레이어
sigmoid = torch.nn.Sigmoid() #ACTIVATION FUNCTION
model = torch.nn.Sequential(linear, sigmoid).to(device)

criterion = torch.nn.BCELoss().to(device) #loss = binary cross entropy loss
optimizer = torch.optim.SGD(model.parameters(), lr = 1) 
#optimizer = stochastic gradient descent optimizer

for step in range(10001):
  optimizer.zero_grad()
  hypothesis = model(X)
  
  cost = criterion(hypothesis, Y) #hypothesis와 Y가 같아지도록 학습
  cost.backward()
  optimizer.step()

  if step % 100 == 0:
    print(step, cost.item()) #loss값이 출력됨

with torch.no_grad():
  hypothesis = model(X)
  predicted = (hypothesis > 0.5).float()
  accuracy = (predicted == Y).float().mean()
  print('\nHypothesis: ', hypothesis.detach().cpu().numpy(), 
        '\nCorrect:', predicted.detach().cpu().numpy(),
        '\nAccuracy: ', accuracy.item())

0 0.7315453290939331
100 0.6931489706039429
200 0.6931471824645996
300 0.6931471228599548
400 0.6931471824645996
500 0.6931471824645996
600 0.6931471824645996
700 0.6931471824645996
800 0.6931471824645996
900 0.6931471824645996
1000 0.6931471824645996
1100 0.6931471824645996
1200 0.6931471824645996
1300 0.6931471824645996
1400 0.6931471824645996
1500 0.6931471824645996
1600 0.6931471824645996
1700 0.6931471824645996
1800 0.6931471824645996
1900 0.6931471824645996
2000 0.6931471824645996
2100 0.6931471824645996
2200 0.6931471824645996
2300 0.6931471824645996
2400 0.6931471824645996
2500 0.6931471824645996
2600 0.6931471824645996
2700 0.6931471824645996
2800 0.6931471824645996
2900 0.6931471824645996
3000 0.6931471824645996
3100 0.6931471824645996
3200 0.6931471824645996
3300 0.6931471824645996
3400 0.6931471824645996
3500 0.6931471824645996
3600 0.6931471824645996
3700 0.6931471824645996
3800 0.6931471824645996
3900 0.6931471824645996
4000 0.6931471824645996
4100 0.6931471824645996
4200

### 2. **XOR 완전 처음부터** 

In [56]:
X = torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y = torch.FloatTensor([[0],[1],[1],[0]]).to(device)

w1 = torch.Tensor(2,2).to(device)
b1 = torch.Tensor(2).to(device)
w2 = torch.Tensor(2,1).to(device)
b2 = torch.Tensor(1).to(device)

def sigmoid(x):
  return 1.0 / (1.0 + torch.exp(-x))

def sigmoid_prime(x):
  return sigmoid(x) * (1 - sigmoid(x))

#backpropagation
for step in range(10001):

  #[forward]
  l1 = torch.add(torch.matmul(X, w1), b1)
  a1 = sigmoid(l1)
  l2 = torch.add(torch.matmul(a1, w2), b2)
  Y_pred = sigmoid(l2)

  cost = -torch.mean(Y * torch.log(Y_pred) + (1-Y) * torch.log(1 - Y_pred))
  #위 cost는 binary cross entropy loss

  #[back prop(chain rule)]
  #Loss derivative
  d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
  #1e-7은 0으로 나누어지는 것을 막기 위함

  #layer2
  d_l2 = d_Y_pred * sigmoid_prime(l2)
  d_b2 = d_l2
  d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)
  '''차원의 축을 지정해줄 수 있음. 예를 들어 10, 5 matrix에서
  0번째 축과 1번째 축을 transpose하면 5, 10이 된다.'''

  #layer1
  d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1))
  d_l1 = d_a1 * sigmoid_prime(l1)
  d_b1 = d_l1
  d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)

  #[weight update]
  learning_rate = 1
  w1 = w1 - learning_rate * d_w1
  b1 = b1 - learning_rate * torch.mean(d_b1, 0)
  w2 = w2 - learning_rate * d_w2
  b2 = b2 - learning_rate * torch.mean(d_b2, 0)

  if step % 100 == 0:
    print(step, cost.item())

with torch.no_grad():
  hypothesis = Y_pred
  predicted = (Y_pred > 0.5).float()
  accuracy = (predicted == Y).float().mean()
  print('\nHypotheses: ', hypothesis.detach().cpu().numpy(),
        '\nCorrect: ', predicted.detach().cpu().numpy(),
        '\nAccuracy: ', accuracy.item())

0 0.6931471824645996
100 0.6931471824645996
200 0.6931471824645996
300 0.6931471824645996
400 0.6931471824645996
500 0.6931471824645996
600 0.6931470632553101
700 0.6931451559066772
800 0.6930591464042664
900 0.6374746561050415
1000 0.515807569026947
1100 0.03073531948029995
1200 0.016541240736842155
1300 0.011155912652611732
1400 0.008369732648134232
1500 0.006678675301373005
1600 0.005547248758375645
1700 0.004738787189126015
1800 0.004133100155740976
1900 0.0036629480309784412
2000 0.0032875738106667995
2100 0.002981178928166628
2200 0.0027263634838163853
2300 0.002511260099709034
2400 0.002327246591448784
2500 0.0021681180223822594
2600 0.002029110211879015
2700 0.0019067481625825167
2800 0.0017980983247980475
2900 0.0017011238960549235
3000 0.0016139850486069918
3100 0.001535230316221714
3200 0.001463752705603838
3300 0.0013985652476549149
3400 0.0013388905208557844
3500 0.0012840854469686747
3600 0.0012335223145782948
3700 0.001186782494187355
3800 0.0011434027692303061
3900 0.00

### 3. **XOR NN**

In [0]:
X = torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y = torch.FloatTensor([[0],[1],[1],[0]]).to(device)

linear1 = torch.nn.Linear(2, 2, bias = True)
linear2 = torch.nn.Linear(2, 1, bias = True)
sigmoid = torch.nn.Sigmoid()

In [0]:
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.5)

In [59]:
for step in range(10001):
  optimizer.zero_grad()
  hypothesis = model(X)

  cost = criterion(hypothesis, Y)
  cost.backward()
  optimizer.step()

  if step % 100 == 0:
    print(step, cost.item())

0 0.7323375940322876
100 0.6930650472640991
200 0.6926268935203552
300 0.6916780471801758
400 0.6885766983032227
500 0.6766101717948914
600 0.6400720477104187
700 0.5736432075500488
800 0.49891355633735657
900 0.3934364318847656
1000 0.25150004029273987
1100 0.1506940871477127
1200 0.09846918284893036
1300 0.07062216848134995
1400 0.05419902503490448
1500 0.043618351221084595
1600 0.03632316738367081
1700 0.031026707962155342
1800 0.027024593204259872
1900 0.02390354312956333
2000 0.0214068740606308
2100 0.0193674024194479
2200 0.017672164365649223
2300 0.016242144629359245
2400 0.015020443126559258
2500 0.013965299353003502
2600 0.013045280240476131
2700 0.012236267328262329
2800 0.01151962485164404
2900 0.01088055968284607
3000 0.010307222604751587
3100 0.009790109470486641
3200 0.00932142324745655
3300 0.008894712664186954
3400 0.008504701778292656
3500 0.008146882057189941
3600 0.007817414589226246
3700 0.007513113785535097
3800 0.007231179624795914
3900 0.006969384849071503
4000 0

In [61]:
with torch.no_grad():
  hypothesis = model(X)
  predicted = (hypothesis > 0.5).float()
  accuracy = (predicted == Y).float().mean()
  print('\nHypothesis: ', hypothesis.detach().cpu().numpy(),
        '\nCorrect: ', predicted.detach().cpu().numpy(),
        '\nAccuracy: ', accuracy.item())


Hypothesis:  [[0.00182415]
 [0.9980762 ]
 [0.9980773 ]
 [0.00288997]] 
Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:  1.0


### 4. **XOR NN WIDE DEEP**

In [0]:
X = torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y = torch.FloatTensor([[0],[1],[1],[0]]).to(device)

linear1 = torch.nn.Linear(2,10, bias = True)
linear2 = torch.nn.Linear(10,10, bias = True)
linear3 = torch.nn.Linear(10,10, bias = True)
linear4 = torch.nn.Linear(10,1, bias = True)
sigmoid = torch.nn.Sigmoid()

model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)

In [78]:
for step in range(10001):
  optimizer.zero_grad()
  hypothesis = model(X)

  cost = criterion(hypothesis, Y)
  cost.backward()
  optimizer.step()

  if step % 100 == 0:
    print(step, cost.item())

0 0.6931946873664856
100 0.6931470036506653
200 0.6931459903717041
300 0.6931450366973877
400 0.6931441426277161
500 0.6931431293487549
600 0.6931421160697937
700 0.6931411027908325
800 0.6931400299072266
900 0.6931388974189758
1000 0.6931377649307251
1100 0.6931365728378296
1200 0.6931352615356445
1300 0.6931338310241699
1400 0.6931325197219849
1500 0.6931309700012207
1600 0.6931293606758118
1700 0.6931277513504028
1800 0.69312584400177
1900 0.6931238174438477
2000 0.6931217908859253
2100 0.6931194067001343
2200 0.6931169033050537
2300 0.693114161491394
2400 0.6931110620498657
2500 0.6931077241897583
2600 0.6931040287017822
2700 0.6930999159812927
2800 0.6930952668190002
2900 0.6930901408195496
3000 0.6930842995643616
3100 0.6930776238441467
3200 0.6930700540542603
3300 0.6930612325668335
3400 0.6930508613586426
3500 0.6930388808250427
3600 0.6930246353149414
3700 0.6930074691772461
3800 0.6929863691329956
3900 0.6929605007171631
4000 0.6929278373718262
4100 0.6928856372833252
4200 0.

In [80]:
with torch.no_grad():
  htpothesis = model(X)
  predicted = (hypothesis > 0.5).float()
  accuracy = (predicted == Y).float().mean()

  print('\nHypothesis: ', hypothesis.detach().cpu().numpy(),
        '\nCorrect: ', predicted.detach().cpu().numpy(),
        '\nAccuracy: ', accuracy.item())


Hypothesis:  [[1.0525438e-04]
 [9.9988246e-01]
 [9.9986696e-01]
 [1.4131620e-04]] 
Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:  1.0


### 5. **MNIST BACKPROP**

In [0]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [0]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
  torch.cuda.manual_seed_all(777)

In [0]:
learning_rate = 0.5
batch_size = 10

In [84]:
mnist_train = dsets.MNIST(root = 'MNIST_data/',
                          train = True,
                          transform = transforms.ToTensor(),
                          download = True)

mnist_test = dsets.MNIST(root = 'MNIST_data/',
                         train = False,
                         transform = transforms.ToTensor(),
                         download = True)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:01, 8151340.64it/s]                            


Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw


  0%|          | 0/28881 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 120978.82it/s]           
  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2021703.30it/s]                            
0it [00:00, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 46822.99it/s]            


Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw
Processing...
Done!


In [0]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                          batch_size = batch_size,
                                          shuffle = True,
                                          drop_last = True)

In [0]:
w1 = torch.nn.Parameter(torch.Tensor(784, 30)).to(device)
b1 = torch.nn.Parameter(torch.Tensor(30)).to(device)
w2 = torch.nn.Parameter(torch.Tensor(30, 10)).to(device)
b2 = torch.nn.Parameter(torch.Tensor(10)).to(device)

In [91]:
torch.nn.init.normal_(w1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(w2)
torch.nn.init.normal_(b2)

tensor([-1.7207,  0.4899,  0.6213,  0.5817,  0.2904, -0.0281,  0.2706,  0.7244,
        -0.0918,  0.7207], device='cuda:0', grad_fn=<CopyBackwards>)

In [0]:
def sigmoid(x):
  return 1.0 / (1.0 + torch.exp(-x))

def sigmoid_prime(x):
  return sigmoid(x) * (1 - sigmoid(x))

In [93]:
X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)[:1000]
Y_test = mnist_test.test_labels.to(device)[:1000]
i = 0

while not i == 10000:
  for X, Y in data_loader:
    i += 1

    X = X.view(-1, 28*28).to(device)
    Y = torch.zeros((batch_size, 10)).scatter_(1, Y.unsqueeze(1), 1).to(device)
    l1 = torch.add(torch.matmul(X, w1), b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    y_pred = sigmoid(l2)

    diff = y_pred - Y

    #backprop(chain rule)
    d_l2 = diff * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2)

    d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)

    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)

    if i% 1000 == 0:
      l1 = torch.add(torch.matmul(X_test, w1), b1)
      a1 = sigmoid(l1)
      l2 = torch.add(torch.matmul(a1, w2), b2)
      y_pred = sigmoid(l2)
      acct_mat = torch.argmax(y_pred, 1) == Y_test
      acct_res = acct_mat.sum()
      print(acct_res.item())

    if i == 10000:
      break



798
868
884
896
907
898
902
897
910
918
