In [2]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import Omniglot
from torchvision.models import resnet18
from tqdm import tqdm

from easyfsl.samplers import TaskSampler
from easyfsl.utils import plot_images, sliding_average

In [3]:
image_size = 28
train_set = Omniglot(root = "../data", transform = transforms.Compose([
    transforms.Grayscale(num_output_channels = 3),
    transforms.RandomResizedCrop(image_size),
    transforms.ToTensor(),]
    ),
                     download = False) 
test_set  = Omniglot(
    root = "../data",
    background= False,
    transform= transforms.Compose([
        # các ảnh trong Omniglot được resize thành 28x28, chỉ có 1 channel, nhưng model ở đây sẽ dự đoán ra 3
        
    transforms.Grayscale(num_output_channels = 3),
    transforms.Resize([int(image_size * 1.15), int(image_size * 1.15)]),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),]
    ), 
    download = False,
)
                     

Downloading https://raw.githubusercontent.com/brendenlake/omniglot/master/python/images_background.zip to ../data/omniglot-py/images_background.zip


  0%|          | 0/9464212 [00:00<?, ?it/s]

Extracting ../data/omniglot-py/images_background.zip to ../data/omniglot-py
Downloading https://raw.githubusercontent.com/brendenlake/omniglot/master/python/images_evaluation.zip to ../data/omniglot-py/images_evaluation.zip


  0%|          | 0/6462886 [00:00<?, ?it/s]

Extracting ../data/omniglot-py/images_evaluation.zip to ../data/omniglot-py


In [29]:
x =  torch.randn(2,3)
print("x = ", x)
print("x.mean = ", x.mean(0))


x =  tensor([[-1.5697, -0.6454, -0.6744],
        [-0.7699, -2.1498,  1.0295]])
x.mean =  tensor([-1.1698, -1.3976,  0.1776])


In [31]:
a = torch.tensor([[0.9041,  0.0196], [-0.3108, -2.4423], [-0.4821,  1.059]])
# 3*2
b = torch.tensor([[-2.1763, -0.4713], [-0.6986,  1.3702]])
# 2*2
torch.cdist(a, b, p=2)


tensor([[3.1193, 2.0959],
        [2.7138, 3.8322],
        [2.2830, 0.3791]])

In [33]:
torch.matmul(a,b)

tensor([[-1.9813, -0.3992],
        [ 2.3826, -3.2000],
        [ 0.3094,  1.6783]])

In [40]:
dist_xa =  torch.cdist(a, b)
-dist_xa

tensor([[-3.1193, -2.0959],
        [-2.7138, -3.8322],
        [-2.2830, -0.3791]])

In [39]:
torch.cdist(torch.Tensor([[0.9041,  0.0196]]), torch.Tensor([[-2.1763, -0.4713]]), p=2)


tensor([[3.1193]])

Hàm torch.cdist truyền vào là một tensor 2D 3 chiều, với mỗi hàng là một vector.  
Tensor x1 có số chiều là B.P.M   
Tensor x2 có số chiều là B.R.M  
đầu ra là tensor có số chiều B.P.R, với mỗi hàng trog x1, x2 là 1 vector M chiều 
Sau đó khoảng cách Euclid được tính lần lượt giưã các vector và kêts quả lưu lại dưới 
ma trận  P.R




In [43]:
class PrototypicalNetworks(nn.Module):
    def __init__(self, backbone: nn.Module):
        super(PrototypicalNetworks, self).__init__()
        self.backbone = backbone
        
    def forward(
        self,
        support_images: torch.Tensor,
        support_labels: torch.Tensor,
        query_images: torch.Tensor,
                 ) -> torch.Tensor:
        """
        Predict query labels using labeled support images
        """
        
        # extract the features of support and query images
        
        z_support = self.backbone.forward(support_images)
        z_query = self.backbone.forward(query_images)
        
        # infer the number if different classes from the labels of support set
        # ( n ways - k shot )
        n_way = len(torch.unique(support_labels))
        
        # prototype i is the mean of all instances of feature corresponding to labels == i 
        # by defaut dim  = 0 : nối  2 vector theo chiều ngang (nếu thêm vào hàng ngang)
        # dim  = 0 : ngang , 1 : dọc (như cách đọc ma trận, dim = x : giữ nguyên số chiều của x, tăng, thay đổi chiều còn lại)
        
        z_proto = torch.cat([z_support[torch.nonzero(support_labels == label)].mean(dim = 0)  
                              for label in range(n_way)],
                            dim = 0)
        # z_support chứa 1 list các image support sau khi đã extract feature
        # Sau đó chúng được lấy trung bình theo chiều ngang (giữ nguyên chiều ngang)
        
        # compute euclid distance between query images and prototype images
        
        
        dists = torch.cdist(z_query, z_proto)
        
        # để biến khoảng cách thành điểm số phân loại thì khoảng cách càng xa thì score càng âm 
        
        scores = -dists
        
        return scores
    
        
convolutional_network  =  resnet18(pretrained = True) # đầu ra là 1000 chiều 
convolutional_network.fc = nn.Flatten() # đầu  ra trước khi fc là 512, ta thay lớp này bằng 1 lớp Flatten, input = 512
# của lớp cũ, và ouput = -1 (tức là output = 512)
print(convolutional_network)

model = PrototypicalNetworks(convolutional_network).cuda()
        
        
        
        
        
        

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [57]:
convolutional_network  =  resnet18(pretrained = True).cuda()
print(convolutional_network)
# convolutional_network.fc = nn.Flatten() # đầu  ra là 512 
image_sample = torch.unsqueeze(torch.rand((3,400,400)),dim = 0).cuda()
convolutional_network(image_sample).shape


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

torch.Size([1, 1000])