In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import math
import numpy as np
import pandas as pd
import random
import re

from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm, notebook

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class dataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __getitem__(self, index):
        
        data = self.data[index]
        label = self.label[index]

        return (data, label)
    
    def __len__(self):
        return self.data.shape[0]

In [10]:
class ScaledDotProductAttention(nn.Module):
    def __init__(self):
        super(ScaledDotProductAttention, self).__init__()
        self.softmax = nn.Softmax(dim=2)
    
    def forward(self, q, k, v, mask=None, e=1e-6):
        batch_size, head, length, d_tensor = k.size()       # ex) (2, 4, 100, 32)
        k_t = k.view(batch_size, head, d_tensor, length)    # (2, 4, 32, 100)
        score = (q @ k_t) / math.sqrt(d_tensor)

        if mask is not None:
            score = score.masked_fill(mask == 0, -e)
        score = self.softmax(score)
        v = score @ v

        return v, score

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, n_head):
        super(MultiHeadAttention, self).__init__()
        self.n_head = n_head
        self.attention = ScaledDotProductAttention()
        self.w_q = nn.Linear(d_model, d_model)
        self.w_k = nn.Linear(d_model, d_model)
        self.w_v = nn.Linear(d_model, d_model)
        self.w_concat = nn.Linear(d_model, d_model)

    def forward(self, q, k, v, mask=None):
        q, k, v = self.w_q(q), self.w_k(k), self.w_v(v)
        q, k, v = self.split(q), self.split(k), self.split(v)

        out, attention = self.attention(q, k, v, mask=mask)
        out = self.concat(out)
        out = self.w_concat(out)

        return out

    def split(self, t):
        batch_size, length, d_model = t.size()
        d_tensor = d_model // self.n_head
        t = t.view(batch_size, self.n_head, length, d_tensor)
        return t

    def concat(self, t):
        batch_size, head, length, d_tensor = t.size()
        d_model = d_tensor * head
        t = t.view(batch_size, length, d_model)
        return t

class PositionwiseFeedForward(nn.Module):
    def __init__(self, d_model, hidden, drop_prob=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.linear1 = nn.Linear(d_model, hidden)
        self.linear2 = nn.Linear(hidden, d_model)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=drop_prob)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)

        return x

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        pe = pe.unsqueeze(0).transpose(0, 1)
        
        self.register_buffer('pe', pe)

    def forward(self, x):
        # print(x.shape, self.pe[:x.size(1), :].squeeze(1).shape)
        x = x + self.pe[:x.size(1), :].squeeze(1)
        return x         
        
# pe = PositionalEncoding(128, 5000, 'cpu')
# token_embedding = torch.rand(size=(1, 100, 128))
# print("pe encoding shape : " , pe.encoding.shape)
# result = pe(token_embedding)
# print("result shape : " , result.shape)

# pe = PositionalEncoding(128, 0.1, 5000)
# token_embedding = torch.rand(size=(1, 100, 128))
# result = pe(token_embedding)
# result.shape

TypeError: zeros() received an invalid combination of arguments - got (str, int), but expected one of:
 * (tuple of ints size, *, tuple of names names, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


In [4]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, ffn_hidden, n_head, drop_prob):
        super(EncoderLayer, self).__init__()
        self.attention = MultiHeadAttention(d_model=d_model, n_head=n_head)
        self.norm1 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(p=drop_prob)

        self.ffn = PositionwiseFeedForward(d_model=d_model, hidden=ffn_hidden, drop_prob=drop_prob)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout2 = nn.Dropout(p=drop_prob)

    def forward(self, x, src_mask):
        _x = x
        x = self.attention(q=x, k=x, v=x, mask=src_mask)

        x = self.norm1(x + _x)
        x = self.dropout1(x)

        _x = x
        x = self.ffn(x)

        x = self.norm2(_x + x)
        x = self.dropout2(x)
        return x

class Encoder(nn.Module):
    def __init__(self, d_model, ffn_hidden, n_head, n_layers, drop_prob):
        super(Encoder, self).__init__()
        self.pe = PositionalEncoding(d_model)
        self.layers = nn.ModuleList([EncoderLayer(d_model=d_model, ffn_hidden=ffn_hidden, n_head=n_head, 
                        drop_prob=drop_prob) for _ in range(n_layers)])
        
    def forward(self, x, src_mask):
        x = x + self.pe(x)

        for layer in self.layers:
            x = layer(x, src_mask)
        
        return x

# inputs = torch.rand(size=(1, 100, 128))
# encoder = Encoder(128, 256, 4, 1, 0.1)
# result = encoder(inputs, None)
# result.shape

In [5]:
def create_look_ahead_mask(size):
    mask = torch.ones(size, size).triu(diagonal=1)
    return mask  # (seq_len, seq_len)

class DecoderLayer(nn.Module):
    def __init__(self, d_model, ffn_hidden, n_head, drop_prob):
        super(DecoderLayer, self).__init__()
        self.self_attention = MultiHeadAttention(d_model=d_model, n_head=n_head)
        self.norm1 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(p=drop_prob)

        self.enc_dec_attention = MultiHeadAttention(d_model=d_model, n_head=n_head)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout2 = nn.Dropout(p=drop_prob)

        self.ffn = PositionwiseFeedForward(d_model=d_model, hidden=ffn_hidden, drop_prob=drop_prob)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout3 = nn.Dropout(p=drop_prob)

    def forward(self, dec, enc, trg_mask, src_mask):
        _x = dec
        x = self.self_attention(q=dec, k=dec, v=dec, mask=trg_mask)

        x = self.norm1(x+_x)
        x = self.dropout1(x)

        if enc is not None:
            _x = x
            x = self.enc_dec_attention(q=x, k=enc, v=enc, mask=src_mask)
        
        _x = x
        x = self.ffn(x)

        x = self.norm3(x+_x)
        x = self.dropout3(x)
        return x

class Decoder(nn.Module):
    def __init__(self, d_model, ffn_hidden, n_head, n_layers, drop_prob):
        super().__init__()
        self.layers = nn.ModuleList([DecoderLayer(d_model=d_model, ffn_hidden=ffn_hidden, n_head=n_head, drop_prob=drop_prob) for _ in range(n_layers)])
        self.linear = nn.Linear(d_model, d_model)

    def forward(self, trg, src, trg_mask, src_mask=None):
        for layer in self.layers:
            trg = layer(trg, src, trg_mask, src_mask)
        
        output = self.linear(trg)
        return output

In [6]:
class Transformer(nn.Module):
    def __init__(self, input_size, d_model, ffn_hidden, n_head, n_layers, drop_prob):
        super(Transformer, self).__init__()
        self.encoder = Encoder(d_model, ffn_hidden, n_head, n_layers, drop_prob)
        self.decoder = Decoder(d_model, ffn_hidden, n_head, n_layers, drop_prob)
        self.conv1x1 = nn.Conv1d(in_channels=input_size, out_channels=d_model, kernel_size=1, stride=1)

    def make_dataset(self, x):
        enc_seq_len = int(x.shape[1] * 0.8)
        dec_seq_len = int(x.shape[1] * 0.2)
        output_sequence_length = int(x.shape[1] * 0.2)

        enc_input = x[:, :enc_seq_len, :]
        dec_input = enc_input[:, -dec_seq_len:, :]
        dec_output = x[:,-output_sequence_length:, :]

        return enc_input, dec_input, dec_output    

    def forward(self, inputs):

        print(type(inputs))
        inputs = torch.permute(inputs, dims=(0, 2, 1))
        print(type(inputs))
        x = self.conv1x1(inputs)
        x = torch.permute(x, dims=(0, 2, 1))
        print(type(x))
        enc_input, dec_input, dec_output = self.make_dataset(x)
        print(type(enc_input))

        enc = self.encoder(enc_input, None)
        dec = self.decoder(dec_input, enc, create_look_ahead_mask(dec_input.size(1)))

        return dec, dec_output

In [7]:
def compute_loss(input, prediction):
    mse = nn.MSELoss()
    loss_mse = mse(prediction, input)
    return loss_mse, loss_mse.item()

In [8]:
# def train(model, dataloader):

#     loss_epoch = []
#     model.train()

#     for index_batch, (inputs, labels) in tqdm(enumerate(dataloader)):
        
#         inputs = inputs.to(device)
#         labels = labels.to(device)
#         predicted, true = model(inputs)
#         loss, loss_value = compute_loss(true, predicted)

#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         loss_epoch.append(loss_value)

#     loss_mean_epoch = np.mean(loss_epoch)
#     loss_std_epoch = np.mean(loss_epoch)
#     loss = {'mean' : loss_mean_epoch, 'std' : loss_std_epoch}
#     return loss

In [9]:
epochs = 20
batch_size = 64
learning_rate = 0.0005
device        = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

losses_mean = np.zeros(epochs)
losses_std = np.zeros(epochs)

In [10]:
from wisdm import wisdm
x_train, y_train = wisdm.create_wisdm()
x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)
print("x trian shape : ", x_train.shape)
print("y train shape : ", y_train.shape)

dataset_train = dataset(x_train, y_train)
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, drop_last=True)

transformer = Transformer(input_size=x_train.shape[2], d_model=128, ffn_hidden=256, n_head=4, n_layers=2, drop_prob=0.1)
optimizer = torch.optim.Adam(transformer.parameters(), lr=learning_rate)

x trian shape :  torch.Size([27815, 200, 3])
y train shape :  torch.Size([27815, 6])


### Train and Test

In [None]:
for i in range(epochs):

    loss_epoch = []
    transformer.train()

    for index_batch, (inputs, labels) in tqdm(enumerate(dataloader_train)):
        print(index_batch)
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        predicted, true = transformer(inputs)
        loss, loss_value = compute_loss(true, predicted)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_epoch.append(loss_value)

    loss_mean_epoch = np.mean(loss_epoch)
    loss_std_epoch = np.mean(loss_epoch)
    loss = {'mean' : loss_mean_epoch, 'std' : loss_std_epoch}

    loss_train = train(transformer, dataloader_train)
    losses_mean[i] = loss_train['mean']
    losses_std[i] = loss_train['std']

    print('epoch : ', i, 'loss_train : ', losses_mean[i])


0it [00:00, ?it/s]

0


NVIDIA RTX A6000 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 compute_37.
If you want to use the NVIDIA RTX A6000 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/

NVIDIA A10 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 compute_37.
If you want to use the NVIDIA A10 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/

NVIDIA RTX A5000 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 compute_37.
If you want to use the NVIDIA RTX A5000 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [None]:
import torch
print(torch.__version__)
print(torch.cuda.get_arch_list())

In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_Mar__8_18:18:20_PST_2022
Cuda compilation tools, release 11.6, V11.6.124
Build cuda_11.6.r11.6/compiler.31057947_0


In [3]:
import numpy as np
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize

  from .autonotebook import tqdm as notebook_tqdm
