In [None]:
#

```
function ClickConnect(){
    console.log("Working!"); 
    document.querySelector("colab-run-button").click()
}
setInterval(ClickConnect, 60000)
```

In [23]:
!./script/init.sh local

Content directory exists.


In [1]:
ENV = "local"
if ENV == "local":
    dir_path = "./content/content/dataset_image"
    NUM_WORKERS = 12
elif ENV == "colab":
    dir_path = "/content/content/dataset_image"
    from google.colab import drive
    drive.mount('/content/drive')
    NUM_WORKERS = 2

In [33]:
import numpy as np
from PIL import Image
from pathlib import Path
import urllib.request
import zipfile
import random
import gc
import time
import datetime

import torch
import torch.utils.data as data
from torchvision import transforms
import os
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers.neptune import NeptuneLogger
from typing import Type, Any, Callable, Union, List, Optional
from torch import Tensor
from torchinfo import summary
print(torch.cuda.is_available())

True


In [3]:
class InterjectionDataset(data.Dataset):
  def __init__(self, dir_path, input_size, meta_df):
    super().__init__()
    self.dir_path = dir_path
    self.input_size = input_size
    self.meta_df = meta_df
    self.len = len(self.meta_df)
  
  def __len__(self):
    return self.len
  
  def __getitem__(self, index):
    row = self.meta_df.iloc[index]
    image_name = row.image
    p = os.path.join(self.dir_path, image_name)
    image = Image.open(p)
    image = image.resize(self.input_size)
    image = np.array(image)
    image = image.reshape(512, 512, 1)
    image = np.transpose(image, (2, 0, 1))
    image = torch.from_numpy(image)
    image = image.type(torch.FloatTensor)

    start = row.start
    end = row.end
    label = np.zeros(512)
    label[start:end] = 1
    label = torch.from_numpy(label)
    label = label.type(torch.FloatTensor)

    return image, label

In [4]:
image_path = os.path.join(dir_path,"image")
meta_path = os.path.join(dir_path,"interjection_data.csv")

with open(meta_path, 'r', encoding='UTF-8') as csv:
  meta_df = pd.read_csv(csv)
  dataset = InterjectionDataset(image_path, (512,512), meta_df)

test_size = int(len(dataset) * 0.2)
valid_size = test_size
train_size = len(dataset) - (test_size + valid_size)
splited_dataset = data.random_split(dataset, [train_size, test_size, valid_size], generator=torch.Generator().manual_seed(0))
train_dataset = splited_dataset[0]
test_dataset = splited_dataset[1]
valid_dataset = splited_dataset[2]

In [14]:
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
  """1x1 convolution"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
  """3x3 convolution with padding"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                    padding=dilation, groups=groups, bias=False, dilation=dilation)

class BasicBlock(nn.Module):
  expansion: int = 1

  def __init__(
    self,
    inplanes: int,
    planes: int,
    stride: int = 1,
    downsample: Optional[nn.Module] = None,
    groups: int = 1,
    base_width: int = 64,
    dilation: int = 1,
    norm_layer: Optional[Callable[..., nn.Module]] = None
  ) -> None:
    super(BasicBlock, self).__init__()
    if norm_layer is None:
        norm_layer = nn.BatchNorm2d
    if groups != 1 or base_width != 64:
        raise ValueError('BasicBlock only supports groups=1 and base_width=64')
    if dilation > 1:
        raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
    # Both self.conv1 and self.downsample layers downsample the input when stride != 1
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = norm_layer(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = norm_layer(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x: Tensor) -> Tensor:
    identity = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
        identity = self.downsample(x)

    out += identity
    out = self.relu(out)

    return out

class SelfAttention(nn.Module):
  def __init__(self, lstm_dim, weight_size, n_multi_heads, is_lstm_bidirection):
    super(SelfAttention, self).__init__()
    self.lstm_dim = lstm_dim
    self.n_multi_heads = n_multi_heads
    self.main = nn.Sequential(
        # Bidirectionalなので各隠れ層のベクトルの次元は２倍のサイズになってます。
        nn.Linear(lstm_dim * (2 ** is_lstm_bidirection), weight_size), 
        nn.Tanh(),
        nn.Linear(weight_size, self.n_multi_heads)
    )
  def forward(self, out):
    return F.softmax(self.main(out), dim=1)

class SelfAttentionClassifier(nn.Module):
  def __init__(self, lstm_dim, weight_size, n_multi_heads, tagset_size, is_lstm_bidirection):
    super(SelfAttentionClassifier, self).__init__()
    self.lstm_dim = lstm_dim
    self.n_multi_heads = n_multi_heads
    self.attn = SelfAttention(lstm_dim, weight_size, n_multi_heads, is_lstm_bidirection)
    self.main = nn.Linear(lstm_dim * (2 ** is_lstm_bidirection) * n_multi_heads, tagset_size)

  def forward(self, out):
    attention_weight = self.attn(out)
    heads = list()
    for i in range(self.n_multi_heads):
        head = (out * attention_weight[:,:,i].unsqueeze(2)).sum(dim=1)
        heads.append(head)
    feats = torch.cat(heads, dim=1)
    return self.main(feats)

def accuracy_thresh(y_pred:Tensor, y_true:Tensor, thresh:float=0.5, sigmoid:bool=True):
   "Compute accuracy when `y_pred` and `y_true` are the same size."
   if sigmoid: y_pred = y_pred.sigmoid()
   return ((y_pred>thresh)==y_true.byte()).float().mean()

class InterjectionModel(pl.LightningModule):
  def __init__(
      self, 
      learning_rate,
      frequency_dim, 
      hidden_dim,
      batch_size,
      num_workers,
      n_lstm_layers,
      is_lstm_bidirection,
      n_multi_heads,
      attention_weight_size,
      res_block_layer_1_size,
      n_res_block_layer_1,
      res_block_layer_2_size,
      n_res_block_layer_2,
      res_block_last_layer_size,
  ):
    super(InterjectionModel, self).__init__()
    self.learning_rate = learning_rate
    self.frequency_dim = frequency_dim
    self.hidden_dim = hidden_dim
    self.batch_size = batch_size
    self.num_workers = num_workers
    self.lstm = nn.LSTM(input_size=frequency_dim, hidden_size=hidden_dim, num_layers=n_lstm_layers, batch_first=True, bidirectional=is_lstm_bidirection)
    self.attention = SelfAttentionClassifier(hidden_dim, attention_weight_size, n_multi_heads, hidden_dim,is_lstm_bidirection)
    self.layer1_first = BasicBlock(1,res_block_layer_1_size)
    self.layer1 = BasicBlock(res_block_layer_1_size,res_block_layer_1_size)
    self.n_res_block_layer_1 = n_res_block_layer_1
    self.layer2_trans = BasicBlock(res_block_layer_1_size, res_block_layer_2_size)
    self.layer2 = BasicBlock(res_block_layer_2_size,res_block_layer_2_size)
    self.n_res_block_layer_2 = n_res_block_layer_2
    self.res_block_last_layer_size = res_block_last_layer_size
    self.res_block_last_layer = conv1x1(res_block_last_layer_size, 1)

    self.softmax = nn.LogSoftmax()
    self.criterion = nn.BCEWithLogitsLoss()
    self.accuracy = accuracy_thresh

  def forward(self, images):
    # images.size() = (batch_size × 1 × frequency_dim × hidden_dim)
    x = images
    
    # Convolution
    if self.n_res_block_layer_1 > 0:
        x = self.layer1_first(x)
        for _ in range(self.n_res_block_layer_1):
            x = self.layer1(x)
            
    if self.n_res_block_layer_2 > 0:
        x = self.layer2_trans(x)
        for _ in range(self.n_res_block_layer_2):
            x = self.layer2(x)
    
    if self.res_block_last_layer_size > 1:
        x = self.res_block_last_layer(x)

    # x.size() = (batch_size × hidden_dim × hidden_dim)
    x = x.view(self.batch_size, self.hidden_dim, self.hidden_dim)

    # LSTM
    # lstm_out[0].size() = ((num_layers × direction)× batch_size × hidden_dim)
    lstm_result, lstm_out = self.lstm(x)
    
    # result.size() = (batch_size × hidden_dim)
    result = self.attention(lstm_result)
    
    return result

  def training_step(self, batch, batch_nb):
    x, y = batch
    y_hat = self(x)
    loss = self.criterion(y_hat, y)
    accuracy = self.accuracy(y_hat, y)
    self.log('train_loss', loss)
    self.log('train_accuracy', accuracy, prog_bar=True)
    return {'loss': loss}
  
  def validation_step(self, batch, batch_nb):
    with torch.no_grad():
        x, y = batch
        y_hat = self(x)
    return {'val_loss': self.criterion(y_hat, y), 'val_accuracy': self.accuracy(y_hat, y)}

  def validation_epoch_end(self, outputs):
    avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
    avg_accuracy = torch.stack([x['val_accuracy'] for x in outputs]).mean()
    self.log('val_loss', avg_loss)
    self.log('val_accuracy', avg_accuracy)
    gc.collect()

  def test_step(self, batch, batch_nb):
      x, y = batch
      y_hat = self(x)
      return {'test_loss': self.criterion(y_hat, y), 'test_accuracy': self.accuracy(y_hat, y)}

  def test_epoch_end(self, outputs):
    avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
    avg_accuracy = torch.stack([x['test_accuracy'] for x in outputs]).mean()
    self.log('test_loss', avg_loss, prog_bar=True)
    self.log('test_accuracy', avg_accuracy, prog_bar=True)

  def configure_optimizers(self):
    # REQUIRED
    # can return multiple optimizers and learning_rate schedulers
    # (LBFGS it is automatically supported, no need for closure function)
    return optim.Adam(self.parameters(), lr=self.learning_rate)

  def train_dataloader(self):
    return data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, drop_last=True, prefetch_factor=1)

  def val_dataloader(self):
    return data.DataLoader(valid_dataset, batch_size=self.batch_size, num_workers=self.num_workers, drop_last=True)

  def test_dataloader(self):
    return data.DataLoader(test_dataset, batch_size=self.batch_size, num_workers=self.num_workers, drop_last=True)

In [61]:
# Project
PROJECT_NAME = 'hourglasshoro/research'
EXPERIMENT_NAME = ''
TAGS = ['IEEE SMC Hiroshima']

# Param
BATCH_SIZE = 16

LightningModule_Params = {'image_size': 512,
                          'n_lstm_layers': 4,
                          'is_lstm_bidirection': True,
                          'n_multi_heads': 2,
                          'attention_weight_size': 64,
                          'learning_rate': 0.001,
                          'res_block_type': 'Basic',
                          'res_block_layer_1_size': 8,
                          'n_res_block_layer_1': 8,
                          'res_block_layer_2_size': 16,
                          'n_res_block_layer_2': 0,
                          'res_block_last_layer_size':8
                          }

LightningDataModule_Params = {'batch_size': BATCH_SIZE,
                              'num_workers': NUM_WORKERS}

LearningRateMonitor_Params = {'logging_interval': 'epoch'}

ModelCheckpoint_Params = {'filename': '{epoch:02d}-{val_loss:.2f}',
                          'save_weights_only': False,
                          'monitor': 'val_loss',
                          'period': 1}

EarlyStopping_Params = {'monitor': 'val_loss',
                        'patience': 3
                       }

Trainer_Params = {'max_epochs': 100,
                  'gpus':1
                 }

ALL_PARAMS = {**LightningModule_Params,
              **LightningDataModule_Params,
              **LearningRateMonitor_Params,
              **EarlyStopping_Params,
              **ModelCheckpoint_Params,
              **Trainer_Params}


torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

model = InterjectionModel(
    learning_rate=ALL_PARAMS['learning_rate'],
    frequency_dim=ALL_PARAMS['image_size'], 
    hidden_dim=ALL_PARAMS['image_size'],
    batch_size=ALL_PARAMS['batch_size'],
    num_workers=ALL_PARAMS['num_workers'],
    n_lstm_layers=ALL_PARAMS['n_lstm_layers'],
    is_lstm_bidirection=ALL_PARAMS['is_lstm_bidirection'],
    n_multi_heads=ALL_PARAMS['n_multi_heads'],
    attention_weight_size=ALL_PARAMS['attention_weight_size'],
    res_block_layer_1_size=ALL_PARAMS['res_block_layer_1_size'],
    n_res_block_layer_1=ALL_PARAMS['n_res_block_layer_1'],
    res_block_layer_2_size=ALL_PARAMS['res_block_layer_2_size'],
    n_res_block_layer_2=ALL_PARAMS['n_res_block_layer_2'],
    res_block_last_layer_size=ALL_PARAMS['res_block_last_layer_size'],
)
early_stop = EarlyStopping(monitor=EarlyStopping_Params['monitor'], patience=EarlyStopping_Params['patience'])
checkpoint = ModelCheckpoint(monitor=ModelCheckpoint_Params['monitor'], filename=ModelCheckpoint_Params['filename'], save_weights_only=ModelCheckpoint_Params['save_weights_only'], period=ModelCheckpoint_Params['period'])

if ENV == "local":
    root_dir = "./tmp/" + PROJECT_NAME + '/' + EXPERIMENT_NAME
elif ENV == "colab":
    root_dir = '/content/drive/MyDrive/' + PROJECT_NAME + '/' + EXPERIMENT_NAME
    
gc.collect()
summary(model, (BATCH_SIZE,1,ALL_PARAMS['image_size'],ALL_PARAMS['image_size']))

Layer (type:depth-idx)                   Output Shape              Param #
├─BasicBlock: 1-1                        [16, 8, 512, 512]         --
|    └─Conv2d: 2-1                       [16, 8, 512, 512]         72
|    └─BatchNorm2d: 2-2                  [16, 8, 512, 512]         16
|    └─ReLU: 2-3                         [16, 8, 512, 512]         --
|    └─Conv2d: 2-4                       [16, 8, 512, 512]         576
|    └─BatchNorm2d: 2-5                  [16, 8, 512, 512]         16
|    └─ReLU: 2-6                         [16, 8, 512, 512]         --
├─BasicBlock: 1-2                        [16, 8, 512, 512]         --
|    └─Conv2d: 2-7                       [16, 8, 512, 512]         576
|    └─BatchNorm2d: 2-8                  [16, 8, 512, 512]         16
|    └─ReLU: 2-9                         [16, 8, 512, 512]         --
|    └─Conv2d: 2-10                      [16, 8, 512, 512]         576
|    └─BatchNorm2d: 2-11                 [16, 8, 512, 512]         16
|    └─ReLU:

Layer (type:depth-idx)                   Output Shape              Param #
├─BasicBlock: 1-1                        [16, 8, 512, 512]         --
|    └─Conv2d: 2-1                       [16, 8, 512, 512]         72
|    └─BatchNorm2d: 2-2                  [16, 8, 512, 512]         16
|    └─ReLU: 2-3                         [16, 8, 512, 512]         --
|    └─Conv2d: 2-4                       [16, 8, 512, 512]         576
|    └─BatchNorm2d: 2-5                  [16, 8, 512, 512]         16
|    └─ReLU: 2-6                         [16, 8, 512, 512]         --
├─BasicBlock: 1-2                        [16, 8, 512, 512]         --
|    └─Conv2d: 2-7                       [16, 8, 512, 512]         576
|    └─BatchNorm2d: 2-8                  [16, 8, 512, 512]         16
|    └─ReLU: 2-9                         [16, 8, 512, 512]         --
|    └─Conv2d: 2-10                      [16, 8, 512, 512]         576
|    └─BatchNorm2d: 2-11                 [16, 8, 512, 512]         16
|    └─ReLU:

In [None]:
trainer = pl.Trainer(max_epochs=Trainer_Params['max_epochs'], gpus=Trainer_Params['gpus'], default_root_dir=root_dir, callbacks=[early_stop, checkpoint])
trainer.fit(model)

In [32]:
from getpass import getpass

api_key = getpass('Enter your private Neptune API token: ')

Enter your private Neptune API token:  ········································································································································································


In [34]:
import requests
import json

slack = getpass('Enter Slack Web Hook URL: ')

Enter Slack Web Hook URL:  ·················································································


In [None]:
neptune_logger = NeptuneLogger(
    api_key=api_key,
    project_name=PROJECT_NAME,
    close_after_fit=False,
    experiment_name=EXPERIMENT_NAME,
    params=ALL_PARAMS,
    tags=TAGS,
    )
trainer = pl.Trainer(max_epochs=Trainer_Params['max_epochs'], gpus=Trainer_Params['gpus'], default_root_dir=root_dir, callbacks=[early_stop, checkpoint], logger=neptune_logger)
gc.collect()

start = time.time()

trainer.fit(model)
results = trainer.test()
print(results)
neptune_logger.experiment.stop()

elapsed_time = time.time() - start
td = datetime.timedelta(seconds=elapsed_time)

loss = results[0]['test_loss']
accuracy = results[0]['test_accuracy']
epoch = trainer.current_epoch
message = "The experiment "+ neptune_logger.experiment_id + " is complete.\n" + "loss: " + str(loss) + "\n" + "accuracy: " + str(accuracy) + "\n" + "epoch: " + str(epoch) + "\n" + "total time: " + str(td)
requests.post(slack, data=json.dumps({
    "text" : message,
    "icon_url" : "https://yt3.ggpht.com/ytc/AAUvwnicT3QerPJcxdXrAGxCDq4aEZd-SdkAr5tJ4RyL=s900-c-k-c0x00ffffff-no-rj",
    "username" : "Experiment BOT"
}))

NeptuneLogger will work in online mode
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


https://app.neptune.ai/hourglasshoro/research/e/RES-13



  | Name                 | Type                    | Params
-----------------------------------------------------------------
0 | lstm                 | LSTM                    | 23.1 M
1 | attention            | SelfAttentionClassifier | 1.1 M 
2 | layer1_first         | BasicBlock              | 680   
3 | layer1               | BasicBlock              | 1.2 K 
4 | layer2_trans         | BasicBlock              | 3.5 K 
5 | layer2               | BasicBlock              | 4.7 K 
6 | res_block_last_layer | Conv2d                  | 8     
7 | softmax              | LogSoftmax              | 0     
8 | criterion            | BCEWithLogitsLoss       | 0     
-----------------------------------------------------------------
24.2 M    Trainable params
0         Non-trainable params
24.2 M    Total params
96.905    Total estimated model params size (MB)


Epoch 0:  13%|█▎        | 101/794 [01:12<08:17,  1.39it/s, loss=0.5, v_num=S-13, train_accuracy=0.789]  