In [None]:
#

```
function ClickConnect(){
    console.log("Working!"); 
    document.querySelector("colab-run-button").click()
}
setInterval(ClickConnect, 60000)
```

In [23]:
!./script/init.sh local

Content directory exists.


In [1]:
ENV = "local"
if ENV == "local":
    dir_path = "./content/content/dataset_image"
    NUM_WORKERS = 12
elif ENV == "colab":
    dir_path = "/content/content/dataset_image"
    from google.colab import drive
    drive.mount('/content/drive')
    NUM_WORKERS = 2

In [2]:
import numpy as np
from PIL import Image
from pathlib import Path
import urllib.request
import zipfile
import random
import gc

import torch
import torch.utils.data as data
from torchvision import transforms
import os
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers.neptune import NeptuneLogger
from typing import Type, Any, Callable, Union, List, Optional
from torch import Tensor
from torchinfo import summary
print(torch.cuda.is_available())

True


In [3]:
class InterjectionDataset(data.Dataset):
  def __init__(self, dir_path, input_size, meta_df):
    super().__init__()
    self.dir_path = dir_path
    self.input_size = input_size
    self.meta_df = meta_df
    self.len = len(self.meta_df)
  
  def __len__(self):
    return self.len
  
  def __getitem__(self, index):
    row = self.meta_df.iloc[index]
    image_name = row.image
    p = os.path.join(self.dir_path, image_name)
    image = Image.open(p)
    image = image.resize(self.input_size)
    image = np.array(image)
    image = image.reshape(512, 512, 1)
    image = np.transpose(image, (2, 0, 1))
    image = torch.from_numpy(image)
    image = image.type(torch.FloatTensor)

    start = row.start
    end = row.end
    label = np.zeros(512)
    label[start:end] = 1
    label = torch.from_numpy(label)
    label = label.type(torch.FloatTensor)

    return image, label

In [4]:
image_path = os.path.join(dir_path,"image")
meta_path = os.path.join(dir_path,"interjection_data.csv")

with open(meta_path, 'r', encoding='UTF-8') as csv:
  meta_df = pd.read_csv(csv)
  dataset = InterjectionDataset(image_path, (512,512), meta_df)

test_size = int(len(dataset) * 0.2)
valid_size = test_size
train_size = len(dataset) - (test_size + valid_size)
splited_dataset = data.random_split(dataset, [train_size, test_size, valid_size], generator=torch.Generator().manual_seed(0))
train_dataset = splited_dataset[0]
test_dataset = splited_dataset[1]
valid_dataset = splited_dataset[2]

In [5]:
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
  """1x1 convolution"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
  """3x3 convolution with padding"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                    padding=dilation, groups=groups, bias=False, dilation=dilation)

class BasicBlock(nn.Module):
  expansion: int = 1

  def __init__(
    self,
    inplanes: int,
    planes: int,
    stride: int = 1,
    downsample: Optional[nn.Module] = None,
    groups: int = 1,
    base_width: int = 64,
    dilation: int = 1,
    norm_layer: Optional[Callable[..., nn.Module]] = None
  ) -> None:
    super(BasicBlock, self).__init__()
    if norm_layer is None:
        norm_layer = nn.BatchNorm2d
    if groups != 1 or base_width != 64:
        raise ValueError('BasicBlock only supports groups=1 and base_width=64')
    if dilation > 1:
        raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
    # Both self.conv1 and self.downsample layers downsample the input when stride != 1
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = norm_layer(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = norm_layer(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x: Tensor) -> Tensor:
    identity = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
        identity = self.downsample(x)

    out += identity
    out = self.relu(out)

    return out

class SelfAttention(nn.Module):
  def __init__(self, lstm_dim, da, r):
    super(SelfAttention, self).__init__()
    self.lstm_dim = lstm_dim
    self.da = da
    self.r = r
    self.main = nn.Sequential(
        # Bidirectionalなので各隠れ層のベクトルの次元は２倍のサイズになってます。
        nn.Linear(lstm_dim * 2, da), 
        nn.Tanh(),
        nn.Linear(da, r)
    )
  def forward(self, out):
    return F.softmax(self.main(out), dim=1)

class SelfAttentionClassifier(nn.Module):
  def __init__(self, lstm_dim, da, r, tagset_size):
    super(SelfAttentionClassifier, self).__init__()
    self.lstm_dim = lstm_dim
    self.r = r
    self.attn = SelfAttention(lstm_dim, da, r)
    self.main = nn.Linear(lstm_dim * 6, tagset_size)

  def forward(self, out):
    attention_weight = self.attn(out)
    m1 = (out * attention_weight[:,:,0].unsqueeze(2)).sum(dim=1)
    m2 = (out * attention_weight[:,:,1].unsqueeze(2)).sum(dim=1)
    m3 = (out * attention_weight[:,:,2].unsqueeze(2)).sum(dim=1)
    feats = torch.cat([m1, m2, m3], dim=1)
    return self.main(feats)

In [6]:
class InterjectionModel(pl.LightningModule):
  def __init__(self, frequency_dim , hidden_dim, batch_size, num_workers):
    super(InterjectionModel, self).__init__()
    self.frequency_dim = frequency_dim
    self.hidden_dim = hidden_dim
    self.batch_size = batch_size
    self.num_workers = num_workers
    self.num_layers = 2
    self.lstm = nn.LSTM(input_size=frequency_dim, hidden_size=hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=True)
    self.attention = SelfAttentionClassifier(hidden_dim, 64, 3, hidden_dim)
    self.layer1 = BasicBlock(1,16)
    self.layer2 = BasicBlock(16,16)
    # self.layer3 = conv1x1(16,32)
    # self.layer4 = BasicBlock(32,32)
    self.layer5 = conv1x1(16, 1)

    self.softmax = nn.LogSoftmax()
    self.criterion = nn.BCEWithLogitsLoss()

  def forward(self, images):
    # images.size() = (batch_size × 1 × frequency_dim × hidden_dim)

#     x = self.layer1(images)
#     x = self.layer2(x)
#     x = self.layer2(x)
#     x = self.layer2(x)
#     x = self.layer2(x)
#     x = self.layer2(x)
#     x = self.layer2(x)
#     x = self.layer2(x)

    # x = self.layer3(x)
    # x = self.layer4(x)
    # x = self.layer4(x)
    # x = self.layer4(x)
#     x = self.layer5(x)

    x = images
    # x.size() = (batch_size × hidden_dim × hidden_dim)
    x = x.view(self.batch_size, self.hidden_dim, self.hidden_dim)

    lstm_result, lstm_out = self.lstm(x)
    # lstm_out[0].size() = ((num_layers × direction)× batch_size × hidden_dim)
#     last_layer = (self.num_layers - 1) * 2
#     lstm_out_mean = (lstm_out[0][last_layer] + lstm_out[0][last_layer + 1]) / 2
#     result = lstm_out_mean.squeeze()
    # result.size() = (batch_size × hidden_dim)
    
#     lstm_result = torch.cat(lstm_out[0][last_layer], lstm_out[0][last_layer + 1])
    result = self.attention(lstm_result)
    return result

  def training_step(self, batch, batch_nb):
    x, y = batch
    y_hat = self(x)
    loss = self.criterion(y_hat, y)
    self.log('train_loss', loss)
    return {'loss': loss}
  
  def validation_step(self, batch, batch_nb):
    with torch.no_grad():
        x, y = batch
        y_hat = self(x)
    return {'val_loss': self.criterion(y_hat, y)}

  def validation_epoch_end(self, outputs):
    avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
    self.log('val_loss', avg_loss)
    gc.collect()

  def test_step(self, batch, batch_nb):
      x, y = batch
      y_hat = self(x)
      return {'test_loss': self.criterion(y_hat, y)}

  def test_epoch_end(self, outputs):
    avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
    self.log('val_loss', avg_loss, prog_bar=True,)

  def configure_optimizers(self):
    # REQUIRED
    # can return multiple optimizers and learning_rate schedulers
    # (LBFGS it is automatically supported, no need for closure function)
    return optim.Adam(self.parameters(), lr=0.02)

  def train_dataloader(self):
    return data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, drop_last=True, prefetch_factor=1)

  def val_dataloader(self):
    return data.DataLoader(valid_dataset, batch_size=self.batch_size, num_workers=self.num_workers, drop_last=True)

  def test_dataloader(self):
    return data.DataLoader(test_dataset, batch_size=self.batch_size, num_workers=self.num_workers, drop_last=True)

In [7]:
from getpass import getpass

api_key = getpass('Enter your private Neptune API token: ')

Enter your private Neptune API token:  ········································································································································································


In [12]:
# Project
PROJECT_NAME = 'hourglasshoro/test'
EXPERIMENT_NAME = ''
TAGS = []

# Param
BATCH_SIZE = 128

LightningModule_Params = {'image_size': 512,
                          'n_lstm_layer': 2,
                          'lstm_bidirection': True,
                          'learning_rate': 0.02,
                          'res_block_type': 'Basic',
                          'res_block_layer_1': 64,
                          'res_block_layer_1_num': 8}

LightningDataModule_Params = {'batch_size': BATCH_SIZE,
                              'num_workers': 2}

LearningRateMonitor_Params = {'logging_interval': 'epoch'}

ModelCheckpoint_Params = {'filename': '{epoch:02d}-{val_loss:.2f}',
                          'save_weights_only': True,
                          'monitor': 'val_loss',
                          'period': 1}

EarlyStopping_Params = {'monitor': 'val_loss'}

Trainer_Params = {'max_epochs': 30}

ALL_PARAMS = {**LightningModule_Params,
              **LightningDataModule_Params,
              **LearningRateMonitor_Params,
              **EarlyStopping_Params,
              **ModelCheckpoint_Params,
              **Trainer_Params}

neptune_logger = NeptuneLogger(
    api_key=api_key,
    project_name=PROJECT_NAME,
    close_after_fit=False,
    experiment_name=EXPERIMENT_NAME,
    params=ALL_PARAMS,
    tags=TAGS,
    )

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

model = InterjectionModel(512, 512, BATCH_SIZE, NUM_WORKERS)
early_stop = EarlyStopping(monitor='val_loss')
checkpoint = ModelCheckpoint(monitor='val_loss', filename='{epoch:02d}-{val_loss:.2f}', save_weights_only=True)

if ENV == "local":
    root_dir = "./tmp/" + PROJECT_NAME + '/' + EXPERIMENT_NAME
elif ENV == "colab":
    root_dir = '/content/drive/MyDrive/' + PROJECT_NAME + '/' + EXPERIMENT_NAME
    
# trainer = pl.Trainer(max_epochs=30, gpus=1, default_root_dir=root_dir, callbacks=[early_stop, checkpoint])
trainer = pl.Trainer(max_epochs=100, gpus=1, default_root_dir=root_dir, callbacks=[early_stop, checkpoint], logger=neptune_logger)
gc.collect()
summary(model, (BATCH_SIZE,1,512,512))

NeptuneLogger will work in online mode
GPU available: True, used: True
TPU available: False, using: 0 TPU cores


Layer (type:depth-idx)                   Output Shape              Param #
├─LSTM: 1-1                              [128, 512, 1024]          10,502,144
├─SelfAttentionClassifier: 1-2           [128, 512]                --
|    └─SelfAttention: 2-1                [128, 512, 3]             --
|    |    └─Sequential: 3-1              [128, 512, 3]             65,795
|    └─Linear: 2-2                       [128, 512]                1,573,376
Total params: 12,141,315
Trainable params: 12,141,315
Non-trainable params: 0
Total mult-adds (M): 13.89
Input size (MB): 128.00
Forward/backward pass size (MB): 546.00
Params size (MB): 46.32
Estimated Total Size (MB): 720.32


Layer (type:depth-idx)                   Output Shape              Param #
├─LSTM: 1-1                              [128, 512, 1024]          10,502,144
├─SelfAttentionClassifier: 1-2           [128, 512]                --
|    └─SelfAttention: 2-1                [128, 512, 3]             --
|    |    └─Sequential: 3-1              [128, 512, 3]             65,795
|    └─Linear: 2-2                       [128, 512]                1,573,376
Total params: 12,141,315
Trainable params: 12,141,315
Non-trainable params: 0
Total mult-adds (M): 13.89
Input size (MB): 128.00
Forward/backward pass size (MB): 546.00
Params size (MB): 46.32
Estimated Total Size (MB): 720.32

In [13]:
trainer.fit(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


https://app.neptune.ai/hourglasshoro/test/e/TES-25



  | Name      | Type                    | Params
------------------------------------------------------
0 | lstm      | LSTM                    | 10.5 M
1 | attention | SelfAttentionClassifier | 1.6 M 
2 | layer1    | BasicBlock              | 2.5 K 
3 | layer2    | BasicBlock              | 4.7 K 
4 | layer5    | Conv2d                  | 16    
5 | softmax   | LogSoftmax              | 0     
6 | criterion | BCEWithLogitsLoss       | 0     
------------------------------------------------------
12.1 M    Trainable params
0         Non-trainable params
12.1 M    Total params
48.594    Total estimated model params size (MB)


Epoch 0:  76%|███████▌  | 74/98 [00:43<00:14,  1.70it/s, loss=0.521, v_num=S-25]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/24 [00:00<?, ?it/s][A
Epoch 0:  78%|███████▊  | 76/98 [00:44<00:12,  1.71it/s, loss=0.521, v_num=S-25]
Validating:   8%|▊         | 2/24 [00:01<00:13,  1.69it/s][A
Epoch 0:  80%|███████▉  | 78/98 [00:45<00:11,  1.73it/s, loss=0.521, v_num=S-25]
Validating:  17%|█▋        | 4/24 [00:01<00:07,  2.83it/s][A
Epoch 0:  82%|████████▏ | 80/98 [00:45<00:10,  1.76it/s, loss=0.521, v_num=S-25]
Validating:  25%|██▌       | 6/24 [00:02<00:04,  3.89it/s][A
Epoch 0:  84%|████████▎ | 82/98 [00:45<00:08,  1.79it/s, loss=0.521, v_num=S-25]
Validating:  33%|███▎      | 8/24 [00:02<00:03,  4.55it/s][A
Epoch 0:  86%|████████▌ | 84/98 [00:46<00:07,  1.82it/s, loss=0.521, v_num=S-25]
Validating:  42%|████▏     | 10/24 [00:02<00:02,  4.93it/s][A
Epoch 0:  88%|████████▊ | 86/98 [00:46<00:06,  1.85it/s, loss=0.521, v_num=S-25]
Validating:  50%|█████     | 12/24

In [10]:
trainer.test()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 100%|██████████| 24/24 [00:05<00:00,  4.32it/s]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'val_loss': 0.5121606588363647}
--------------------------------------------------------------------------------


[{'val_loss': 0.5121606588363647}]

In [14]:
neptune_logger.experiment.stop()

## Not use Pytorch Lightning

In [None]:
class LSTMClassifier(nn.Module):
  def __init__(self, frequency_dim , hidden_dim, batch_size):
    super(LSTMClassifier, self).__init__()
    self.frequency_dim = frequency_dim
    self.hidden_dim = hidden_dim
    self.batch_size = batch_size
    self.lstm = nn.LSTM(input_size=frequency_dim, hidden_size=hidden_dim, num_layers=1, batch_first=True)
    # self.lstm = nn.LSTM(input_size=frequency_dim, hidden_size=hidden_dim, num_layers=2, batch_first=True, bidirectional=True)
    self.softmax = nn.LogSoftmax()
  
  def forward(self, images):
    # images.size() = (batch_size × 1 × frequency_dim × hidden_dim)
    images = images.view(self.batch_size, self.frequency_dim, self.hidden_dim)
    # images.size() = (batch_size × frequency_dim × hidden_dim)
    _, lstm_out = self.lstm(images)
    # lstm_out[0].size() = (1 × batch_size × hidden_dim)
    # result = self.softmax(lstm_out[0].squeeze())
    # result.size() = (batch_size × hidden_dim)

    result = lstm_out[0].squeeze()

    return result

In [None]:
model = LSTMClassifier(512, 512, BATCH_SIZE).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(50):
  all_loss = 0
  train_dataloader = data.DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=2, drop_last=True
  )
  for i , (images, labels) in enumerate(train_dataloader):

    # 順伝搬させるtensorはGPUで処理させるためdevice=にGPUをセット
    image_tensor = torch.tensor(images, device=device)
    # category_tensor.size() = (batch_size × 1)なので、squeeze()
    label_tensor = torch.tensor(labels, device=device).squeeze()

    out = model(image_tensor)
    batch_loss = criterion(out, label_tensor)

    optimizer.zero_grad()
    batch_loss.backward()
    optimizer.step()

    all_loss += batch_loss.item()
  print(epoch+1, all_loss)

  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':


1 111.41851443052292
2 111.2116351723671
3 111.16537064313889
4 111.16139322519302
5 111.16120076179504
6 111.16709697246552
7 111.16564673185349
8 111.17030203342438
9 111.17257761955261
10 111.171446621418
11 111.17331147193909
12 111.17211610078812
13 111.17215651273727
14 111.17157238721848
15 111.17320388555527
16 111.1725537776947
17 111.17196184396744
18 111.17305034399033
19 111.17205286026001
20 111.1707011461258
21 111.14725542068481
22 111.14445626735687
23 111.1438837647438
24 111.14277589321136
25 111.14232015609741
26 111.14104413986206
27 111.14273911714554
28 111.14228075742722
29 111.14271742105484
30 111.14269280433655
31 111.14145314693451
32 111.14332270622253
33 111.1345984339714
34 111.11222237348557
35 111.1142349243164
36 111.11433750391006
37 111.11308234930038
38 111.11320859193802
39 111.11185383796692
40 111.11280363798141
41 111.11339288949966
42 111.11564862728119
43 111.11343276500702
44 111.11402040719986
45 111.11277449131012
46 111.11353552341461
47 11