In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important;}</style>"))
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

from torch import nn
from torch import Tensor
from PIL import Image
#from torchvision.transforms import Compose, Resize, ToTensor
from einops import rearrange, reduce, repeat
from einops.layers.torch import Rearrange, Reduce
from torchsummary import summary
import numpy as np

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa

In [2]:
x = torch.linspace(0,1000,10000)
x = torch.reshape(x,(10,1,1000,1))
y = torch.linspace(0,100,100)
print(x.shape,y.shape)

torch.Size([10, 1, 1000, 1]) torch.Size([100])


In [9]:
x = torch.linspace(0,1000,10000)
x = torch.reshape(x,(100,1,100,1))
y = torch.linspace(0,100,100)
print(x.shape)

class PatchEmbedding(nn.Module):
    def __init__(self, in_channels:int=1, patch_size=10, emb_size: int=10, img_size: int = 1):
        self.patch_size = patch_size
        super().__init__()
        self.projection = nn.Sequential(
        # break-down the image in s1Xs2 patches and flat them
            Rearrange('b c (h s1) (w s2) -> b (h w) (s1 s2 c)', s1=patch_size,s2=1),
            nn.Linear(patch_size * 1 * in_channels, emb_size)
        )
        self.cls_token = nn.Parameter(torch.randn(1,1, emb_size))
        self.positions = nn.Parameter(torch.randn((img_size // patch_size) **2 + 1, emb_size))
        
    def forward(self, x: Tensor) -> Tensor:
        b, _, _, _ = x.shape
        x = self.projection(x)
        cls_tokens = repeat(self.cls_token, '() n e -> b n e', b=b)
        # prepend the cls token to the input
        x = torch.cat([cls_tokens, x], dim=1)
        # add position embedding
        x += self.positions
        return x

# MultiHeadAttention

class MultiHeadAttention(nn.Module):
    def __init__(self, emb_size: int = 10, num_heads: int = 2, dropout: float = 0):
        super().__init__()
        self.emb_size = emb_size
        self.num_heads = num_heads
        self.keys = nn.Linear(emb_size, emb_size)
        self.queries = nn.Linear(emb_size, emb_size)
        self.values = nn.Linear(emb_size, emb_size)
        self.att_drop = nn.Dropout(dropout)
        self.projection = nn.Linear(emb_size, emb_size)
        self.scaling = (self.emb_size // num_heads) ** -0.5

    def forward(self, x : Tensor, mask: Tensor = None) -> Tensor:
        # split keys, queries and values in num_heads
        queries = rearrange(self.queries(x), "b n (h d) -> b h n d", h=self.num_heads)
        #print('query : ',queries.shape)
        #print(n)
        keys = rearrange(self.keys(x), "b n (h d) -> b h n d", h=self.num_heads)
        #print('key : ',keys.shape)
        values  = rearrange(self.values(x), "b n (h d) -> b h n d", h=self.num_heads)
        #print('values : ',values.shape)
        # sum up over the last axis
        energy = torch.einsum('bhqd, bhkd -> bhqk', queries, keys) # batch, num_heads, query_len, key_len
        #print('energy : ',energy.shape)
        if mask is not None:
            fill_value = torch.finfo(torch.float32).min
            energy.mask_fill(~mask, fill_value)
            
        att = F.softmax(energy, dim=-1) * self.scaling
        att = self.att_drop(att)
        # sum up over the third axis
        out = torch.einsum('bhal, bhlv -> bhav ', att, values)
        out = rearrange(out, "b h n d -> b n (h d)")
        out = self.projection(out)
        return out
'''
Whatever goes inside of class, it should have the form of (b,c,h,w)
'''
patches_embedded = PatchEmbedding()(x)
print('Patch shape : ',patches_embedded.shape)
MultiHeadAttention()(patches_embedded).shape
print('MultiHead Output : ', MultiHeadAttention()(patches_embedded).shape)


# Residual

class ResidualAdd(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, **kwargs):
        res = x
        x  = self.fn(x, **kwargs)
        x += res
        return x
# FC Block
class FeedForwardBlock(nn.Sequential):
    def __init__(self, emb_size: int, expansion: int = 4, drop_p: float = 0.2):
        super().__init__(
            nn.Linear(emb_size, expansion * emb_size),
            nn.ReLU(),
            nn.Dropout(drop_p),
            nn.Linear(expansion * emb_size, emb_size),
        )
# Transformer encoder block
class TransformerEncoderBlock(nn.Sequential):
    def __init__(self,
                 emb_size: int = 10,
                 drop_p: float = 0.,
                 forward_expansion: int = 50,
                 forward_drop_p: float = 0.,
                 ** kwargs):
        super().__init__(
            ResidualAdd(nn.Sequential(
                nn.LayerNorm(emb_size),
                MultiHeadAttention(emb_size, **kwargs),
                nn.Dropout(drop_p)
            )),
            ResidualAdd(nn.Sequential(
                nn.LayerNorm(emb_size),
                FeedForwardBlock(
                    emb_size, expansion=forward_expansion, drop_p=forward_drop_p),
                nn.Dropout(drop_p)
            )
            ))
patches_embedded = PatchEmbedding()(x)
TransformerEncoderBlock()(patches_embedded).shape

torch.Size([100, 1, 100, 1])
Patch shape :  torch.Size([100, 11, 10])
MultiHead Output :  torch.Size([100, 11, 10])


torch.Size([100, 11, 10])

In [18]:
class TransformerEncoder(nn.Sequential):
    def __init__(self, depth: int = 12, **kwargs):
        super().__init__(*[TransformerEncoderBlock(**kwargs) for _ in range(depth)])
        
class ClassificationHead(nn.Sequential):
    def __init__(self, emb_size: int = 48, n_classes: int = 1):
        super().__init__(
            Reduce('b n e -> b e', reduction='mean'),
            nn.LayerNorm(emb_size), 
            nn.Linear(emb_size, n_classes))

class ViT(nn.Sequential):
    def __init__(self,     
                in_channels: int = 1,
                patch_size: int = 10,
                emb_size: int = 10,
                img_size: int = 1,
                depth: int = 50,
                n_classes: int = 1,
                **kwargs):
        super().__init__(
            PatchEmbedding(in_channels, patch_size, emb_size, img_size),
            TransformerEncoder(depth, emb_size=emb_size, **kwargs),
            ClassificationHead(emb_size, n_classes)
        )
        
model = ViT()
#summary(model, (1, 100, 1), device='cuda')

In [19]:
summary(ViT(), (1, 500, 1), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Rearrange-1               [-1, 50, 10]               0
            Linear-2               [-1, 50, 10]             110
    PatchEmbedding-3               [-1, 51, 10]               0
         LayerNorm-4               [-1, 51, 10]              20
            Linear-5               [-1, 51, 10]             110
            Linear-6               [-1, 51, 10]             110
            Linear-7               [-1, 51, 10]             110
           Dropout-8            [-1, 2, 51, 51]               0
            Linear-9               [-1, 51, 10]             110
MultiHeadAttention-10               [-1, 51, 10]               0
          Dropout-11               [-1, 51, 10]               0
      ResidualAdd-12               [-1, 51, 10]               0
        LayerNorm-13               [-1, 51, 10]              20
           Linear-14              [-1,

In [12]:
train_X = np.load('train_X.npy').flatten()
train_y_r = np.sum(np.load('train_Y.npy'),axis=1).flatten()

test_X = np.load('test_X.npy').flatten()
test_y_r = np.sum(np.load('test_y.npy'),axis=1).flatten()

val_X = np.load('val_X.npy').flatten()
val_y_r = np.sum(np.load('val_y.npy'),axis=1).flatten()

# training
#train_X = train_X.reshape(1,np.int(train_X.shape[0]/1000),1000, 1)
#train_y_r = train_y_r.reshape(train_y_r.shape[0],1)
#train_y_pp = train_y_pp.reshape(np.int(train_y_pp.shape[0]/100),100,1)

# validation
#val_X = val_X.reshape(np.int(val_X.shape[0]/1000),1000, 1)
#val_y_r = val_y_r.reshape(val_y_r.shape[0],1)
#val_y_pp = val_y_pp.reshape(np.int(val_y_pp.shape[0]/100),100,1)

print("training samples :",train_X.shape[0])
print("validation samples :",val_X.shape[0])
print("testing samples :",test_X.shape[0])

training samples : 120000000
validation samples : 40000000
testing samples : 40000000


In [13]:
from torch.utils.data import Dataset,DataLoader,TensorDataset
x = torch.from_numpy(train_X[:10000000].astype(np.float32))#.double()
x = torch.reshape(x,(10000,1,1000,1))#.double()
y = torch.from_numpy(train_y_r[:10000].astype(np.float32))
c = 1; h=1000; w=1
x_train = torch.Tensor(x).reshape(x.shape[0],c,h,w)

from torch.utils.data import Dataset,DataLoader,TensorDataset
vx = torch.from_numpy(val_X[:8000000].astype(np.float32))#.double()
vx = torch.reshape(vx,(8000,1,1000,1))#.double()
vy = torch.from_numpy(train_y_r[:8000].astype(np.float32))
c = 1; h=1000; w=1
v_train = torch.Tensor(vx).reshape(vx.shape[0],c,h,w)

In [14]:
from torch.utils.data import Dataset,DataLoader,TensorDataset
x = torch.from_numpy(train_X[:].astype(np.float32))#.double()
x = torch.reshape(x,(240000,1,500,1))#.double()
y = torch.from_numpy(train_y_r[:].astype(np.float32))
c = 1; h=500; w=1
x_train = torch.Tensor(x).reshape(x.shape[0],c,h,w)

from torch.utils.data import Dataset,DataLoader,TensorDataset
vx = torch.from_numpy(val_X[:].astype(np.float32))#.double()
vx = torch.reshape(vx,(80000,1,500,1))#.double()
vy = torch.from_numpy(val_y_r[:].astype(np.float32))
c = 1; h=500; w=1
v_train = torch.Tensor(vx).reshape(vx.shape[0],c,h,w)

In [20]:
y_train = torch.Tensor(y).reshape(y.shape[0],)#.type(torch.LongTensor)
vy_train = torch.Tensor(vy).reshape(vy.shape[0],)#.type(torch.LongTensor)
#y_test = torch.Tensor(y_test).reshape(y_test.shape[0],)
print(f'x_train shape: {x_train.shape} - y_train: {y_train.shape}')
print(f'x_train shape: {v_train.shape} - y_train: {vy_train.shape}')

train_set = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_set,batch_size=128)

valid_set = TensorDataset(v_train,vy_train)
validloader = DataLoader(valid_set,batch_size=128)

from torch import optim
#loss_function = nn.CrossEntropyLoss()
loss_function = nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay= 1e-3, momentum = 0.6, nesterov = True)
optimizer = optim.Adam(model.parameters(),lr=0.0001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
n_epochs = 50 # this is a hyperparameter you'll need to define

for epoch in range(n_epochs):
    ##################
    ### TRAIN LOOP ###
    ##################
    # set the model to train mode
    model.train()
    train_loss = 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        # clear the old gradients from optimized variables
        optimizer.zero_grad()
        # forward pass: feed inputs to the model to get outputs
        output = model(data)
        # calculate the training batch loss
        loss = loss_function(output, target)
        # backward: perform gradient descent of the loss w.r. to the model params
        loss.backward()
        # update the model parameters by performing a single optimization step
        optimizer.step()
        # accumulate the training loss
        train_loss += loss.item()

    #######################
    ### VALIDATION LOOP ###
    #######################
    # set the model to eval mode
    model.eval()
    valid_loss = 0
    # turn off gradients for validation
    with torch.no_grad():
        for data, target in validloader:
            data, target = data.to(device), target.to(device)
            # forward pass
            output = model(data)
            # validation batch loss
            loss = loss_function(output, target) 
            # accumulate the valid_loss
            valid_loss += loss.item()
            
    #########################
    ## PRINT EPOCH RESULTS ##
    #########################
    train_loss /= len(train_loader)
    valid_loss /= len(validloader)
    print(f'Epoch: {epoch+1}/{n_epochs}.. Training loss: {train_loss}.. Validation Loss: {valid_loss}')

x_train shape: torch.Size([240000, 1, 500, 1]) - y_train: torch.Size([240000])
x_train shape: torch.Size([80000, 1, 500, 1]) - y_train: torch.Size([80000])


KeyboardInterrupt: 

In [8]:
y_train = torch.Tensor(y).reshape(y.shape[0],)#.type(torch.LongTensor)
vy_train = torch.Tensor(vy).reshape(vy.shape[0],)#.type(torch.LongTensor)
#y_test = torch.Tensor(y_test).reshape(y_test.shape[0],)
print(f'x_train shape: {x_train.shape} - y_train: {y_train.shape}')
print(f'x_train shape: {v_train.shape} - y_train: {vy_train.shape}')

train_set = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_set,batch_size=1000)

valid_set = TensorDataset(v_train,vy_train)
validloader = DataLoader(valid_set,batch_size=1000)

from torch import optim
#loss_function = nn.CrossEntropyLoss()
loss_function = nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay= 1e-3, momentum = 0.6, nesterov = True)
optimizer = optim.Adam(model.parameters(),lr=0.001)


n_epochs = 50 # this is a hyperparameter you'll need to define

for epoch in range(n_epochs):
    ##################
    ### TRAIN LOOP ###
    ##################
    # set the model to train mode
    model.train()
    train_loss = 0
    for data, target in train_loader:
        # clear the old gradients from optimized variables
        optimizer.zero_grad()
        # forward pass: feed inputs to the model to get outputs
        output = model(data)
        # calculate the training batch loss
        loss = loss_function(output, target)
        # backward: perform gradient descent of the loss w.r. to the model params
        loss.backward()
        # update the model parameters by performing a single optimization step
        optimizer.step()
        # accumulate the training loss
        train_loss += loss.item()

    #######################
    ### VALIDATION LOOP ###
    #######################
    # set the model to eval mode
    model.eval()
    valid_loss = 0
    # turn off gradients for validation
    with torch.no_grad():
        for data, target in validloader:
            # forward pass
            output = model(data)
            # validation batch loss
            loss = loss_function(output, target) 
            # accumulate the valid_loss
            valid_loss += loss.item()
            
    #########################
    ## PRINT EPOCH RESULTS ##
    #########################
    train_loss /= len(train_loader)
    valid_loss /= len(validloader)
    print(f'Epoch: {epoch+1}/{n_epochs}.. Training loss: {train_loss}.. Validation Loss: {valid_loss}')

x_train shape: torch.Size([161000, 1, 1000, 1]) - y_train: torch.Size([161000])
x_train shape: torch.Size([8000, 1, 1000, 1]) - y_train: torch.Size([8000])


  return F.mse_loss(input, target, reduction=self.reduction)


KeyboardInterrupt: 

In [10]:
import torch

torch.cuda.is_available()

torch.cuda.device_count()

torch.cuda.current_device()

torch.cuda.device(0)

torch.cuda.get_device_name(0)


'NVIDIA GeForce RTX 3050 Ti Laptop GPU'

In [11]:
import torch
torch.cuda.is_available()

True

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [13]:
y_train = torch.Tensor(y).reshape(y.shape[0],)#.type(torch.LongTensor)
vy_train = torch.Tensor(vy).reshape(vy.shape[0],)#.type(torch.LongTensor)
#y_test = torch.Tensor(y_test).reshape(y_test.shape[0],)
print(f'x_train shape: {x_train.shape} - y_train: {y_train.shape}')
print(f'x_train shape: {v_train.shape} - y_train: {vy_train.shape}')

train_set = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_set,batch_size=1000)

valid_set = TensorDataset(v_train,vy_train)
validloader = DataLoader(valid_set,batch_size=1000)

from torch import optim
#loss_function = nn.CrossEntropyLoss()
loss_function = nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay= 1e-3, momentum = 0.6, nesterov = True)
optimizer = optim.Adam(model.parameters(),lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
n_epochs = 50 # this is a hyperparameter you'll need to define

for epoch in range(n_epochs):
    ##################
    ### TRAIN LOOP ###
    ##################
    # set the model to train mode
    model.train()
    train_loss = 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        # clear the old gradients from optimized variables
        optimizer.zero_grad()
        # forward pass: feed inputs to the model to get outputs
        output = model(data)
        # calculate the training batch loss
        loss = loss_function(output, target)
        # backward: perform gradient descent of the loss w.r. to the model params
        loss.backward()
        # update the model parameters by performing a single optimization step
        optimizer.step()
        # accumulate the training loss
        train_loss += loss.item()

    #######################
    ### VALIDATION LOOP ###
    #######################
    # set the model to eval mode
    model.eval()
    valid_loss = 0
    # turn off gradients for validation
    with torch.no_grad():
        for data, target in validloader:
            data, target = data.to(device), target.to(device)
            # forward pass
            output = model(data)
            # validation batch loss
            loss = loss_function(output, target) 
            # accumulate the valid_loss
            valid_loss += loss.item()
            
    #########################
    ## PRINT EPOCH RESULTS ##
    #########################
    train_loss /= len(train_loader)
    valid_loss /= len(validloader)
    print(f'Epoch: {epoch+1}/{n_epochs}.. Training loss: {train_loss}.. Validation Loss: {valid_loss}')

x_train shape: torch.Size([161000, 1, 1000, 1]) - y_train: torch.Size([161000])
x_train shape: torch.Size([8000, 1, 1000, 1]) - y_train: torch.Size([8000])
Epoch: 1/50.. Training loss: 6461.130845883248.. Validation Loss: 362.01186180114746
Epoch: 2/50.. Training loss: 4439.795080212332.. Validation Loss: 1370.8190307617188


KeyboardInterrupt: 