<a href="https://colab.research.google.com/github/Zawo1995/pytorch-TPU_test/blob/master/pytorch_CPU_GPU_TPU%E9%80%9F%E5%BA%A6%E6%B5%8B%E8%AF%95.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## PyTorch/TPU MNIST Demo

This colab example corresponds to the implementation under [test_train_mnist.py](https://github.com/pytorch/xla/blob/master/test/test_train_mnist.py) and is TF/XRT 1.15 compatible.

<h3>  &nbsp;&nbsp;Use Colab Cloud TPU&nbsp;&nbsp; <a href="https://cloud.google.com/tpu/"><img valign="middle" src="https://raw.githubusercontent.com/GoogleCloudPlatform/tensorflow-without-a-phd/master/tensorflow-rl-pong/images/tpu-hexagon.png" width="50"></a></h3>

* On the main menu, click Runtime and select **Change runtime type**. Set "TPU" as the hardware accelerator.
* The cell below makes sure you have access to a TPU on Colab.


In [0]:
import os
assert os.environ['COLAB_TPU_ADDR'], 'Make sure to select TPU from Edit > Notebook settings > Hardware accelerator'

### [RUNME] Install Colab TPU compatible PyTorch/TPU wheels and dependencies
This may take up to ~2 minutes

In [0]:
DIST_BUCKET="gs://tpu-pytorch/wheels"
TORCH_WHEEL="torch-1.15-cp36-cp36m-linux_x86_64.whl"
TORCH_XLA_WHEEL="torch_xla-1.15-cp36-cp36m-linux_x86_64.whl"
TORCHVISION_WHEEL="torchvision-0.3.0-cp36-cp36m-linux_x86_64.whl"

# Install Colab TPU compat PyTorch/TPU wheels and dependencies
!pip uninstall -y torch torchvision
!gsutil cp "$DIST_BUCKET/$TORCH_WHEEL" .
!gsutil cp "$DIST_BUCKET/$TORCH_XLA_WHEEL" .
!gsutil cp "$DIST_BUCKET/$TORCHVISION_WHEEL" .
!pip install "$TORCH_WHEEL"
!pip install "$TORCH_XLA_WHEEL"
!pip install "$TORCHVISION_WHEEL"
!sudo apt-get install libomp5

### Define Parameters


In [10]:

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision

from tqdm import tqdm

num_cores = 8


data_dir = "/tmp/mnist"
EPOCH = 10
BATCH_SIZE = 50
LR = 0.001
DOWNLOAD_MNIST = False
device_type = 1#0 cpu 1 gpu 2 tpu
if device_type == 0:
  device = torch.device("cpu")
elif device_type == 1:
  device = torch.device("cuda:0")
elif device_type == 2:
  import torch_xla.distributed.data_parallel as dp
  import torch_xla.core.xla_model as xm
  device = xm.xla_device()
  
  
train_data = torchvision.datasets.MNIST(root=data_dir, train=True, transform=torchvision.transforms.ToTensor(), download=DOWNLOAD_MNIST,)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

test_data = torchvision.datasets.MNIST(root=data_dir, train=False)

# !!!!!!!! Change in here !!!!!!!!! #
test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000].to(device)/255.   # Tensor on GPU
test_y = test_data.test_labels[:2000].to(device)


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2,),
                                   nn.ReLU(), nn.MaxPool2d(kernel_size=2),)
        self.conv2 = nn.Sequential(nn.Conv2d(16, 32, 5, 1, 2), nn.ReLU(), nn.MaxPool2d(2),)
        self.out = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        return output

cnn = CNN()
cnn = cnn.to(device)
# cnn = dp.DataParallel(cnn, device_ids=devices)
# pdb.set_trace()
print(cnn)


optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()

for epoch in tqdm(range(EPOCH)):
    for step, (x, y) in enumerate(train_loader):

        # !!!!!!!! Change in here !!!!!!!!! #
        b_x = x.to(device)    # Tensor on GPU
        b_y = y.to(device)    # Tensor on GPU

        output = cnn(b_x)
        loss = loss_func(output, b_y)
        optimizer.zero_grad()
        loss.backward()
        if device_type == 2:
          xm.optimizer_step(optimizer, barrier=True)
        else:
          optimizer.step()
        

        if step % 50 == 0:
            test_output = cnn(test_x)

            # !!!!!!!! Change in here !!!!!!!!! #
            pred_y = torch.max(test_output, 1)[1].to(device)  # move the computation in GPU

            accuracy = torch.sum(pred_y == test_y).type(torch.FloatTensor) / test_y.size(0)
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy(), '| test accuracy: %.2f' % accuracy)


test_output = cnn(test_x[:10])

# !!!!!!!! Change in here !!!!!!!!! #
pred_y = torch.max(test_output, 1)[1].to(device)

print(pred_y, 'prediction number')
print(test_y[:10], 'real number')





  0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)
Epoch:  0 | train loss: 2.3069 | test accuracy: 0.16
Epoch:  0 | train loss: 0.7145 | test accuracy: 0.81
Epoch:  0 | train loss: 0.2710 | test accuracy: 0.87
Epoch:  0 | train loss: 0.3454 | test accuracy: 0.92
Epoch:  0 | train loss: 0.2372 | test accuracy: 0.94
Epoch:  0 | train loss: 0.3531 | test accuracy: 0.93
Epoch:  0 | train loss: 0.0897 | test accuracy: 0.94
Epoch:  0 | train loss: 0.1182 | test accuracy: 0.96
Epoch:  0 | train loss: 0.1455 | test accuracy: 0.95
Epoch:  0 | train loss: 0.1310 | test accuracy: 0.9





 10%|█         | 1/10 [00:09<01:25,  9.55s/it][A[A[A[A

Epoch:  1 | train loss: 0.0403 | test accuracy: 0.98
Epoch:  1 | train loss: 0.1028 | test accuracy: 0.97
Epoch:  1 | train loss: 0.0147 | test accuracy: 0.98
Epoch:  1 | train loss: 0.0292 | test accuracy: 0.98
Epoch:  1 | train loss: 0.0219 | test accuracy: 0.98
Epoch:  1 | train loss: 0.0235 | test accuracy: 0.97
Epoch:  1 | train loss: 0.0650 | test accuracy: 0.98


KeyboardInterrupt: ignored