In [1]:
import torch
import torch.nn.functional as F
from typing import Optional
import random
import torch
from torch import nn
from einops import rearrange, repeat
from einops.layers.torch import Rearrange
import os
import cv2
import numpy as np
import math
import mediapipe as mp
from matplotlib import pyplot as plt
import glob
from util.img2bone import HandDetector
import torch
import torch.nn as nn

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import glob
from tqdm.auto import tqdm
from einops import rearrange, repeat
from einops.layers.torch import Rearrange
from loader.dataloader import MultiModalData1
from sklearn.metrics import precision_score, recall_score, f1_score
from util.log import Log
from trainer import train,validate,get_accuracy


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = "cuda:1"

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


seed_everything(20)



train_set = MultiModalData1(glob.glob("data/new_data/Scalogram/train2/*"))
test_set = MultiModalData1(glob.glob("data/new_data/Scalogram/test2/*"))
val_set = MultiModalData1(glob.glob("data/new_data/Scalogram/val2/*"))



train_loader = DataLoader(train_set, batch_size=64,
                          drop_last=True, num_workers=3,shuffle=True)
valid_loader = DataLoader(val_set, batch_size=64,
                          drop_last=True, num_workers=3)
test_loader = DataLoader(test_set, batch_size=64,
                         drop_last=True, num_workers=3)

14228
1375
2419


In [4]:
# BSD 2-Clause License

# Copyright (c) 2019 wangvation. All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# ============================================================================

from torch import nn
from torch.nn import Conv2d
from torch.nn import BatchNorm2d
from torch.nn import AvgPool2d
from torch.nn import Softmax2d
from torch.nn import ReLU6
from torch.nn.functional import relu6
from torch.nn.functional import relu


def _make_divisible(v, divisor, min_value=None):
  """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
  """
  if min_value is None:
    min_value = divisor
  new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  # Make sure that round down does not go down by more than 10%.
  if new_v < 0.9 * v:
    new_v += divisor
  return new_v


class DepthSepConv(nn.Module):
  """docstring for Depthwise Separable Convolution"""

  def __init__(self,
               in_channels,
               out_channels,
               ksize=3,
               stride=1,
               padding=1,
               multiplier=1):
    super(DepthSepConv, self).__init__()
    in_channels = _make_divisible(in_channels * multiplier, 8)
    out_channels = _make_divisible(out_channels * multiplier, 8)
    self.depthwise_conv = Conv2d(in_channels=in_channels,
                                 out_channels=in_channels,
                                 kernel_size=ksize,
                                 stride=stride,
                                 padding=padding,
                                 groups=in_channels)

    self.bn1 = BatchNorm2d(in_channels)

    self.pointwise_conv = Conv2d(in_channels=in_channels,
                                 out_channels=out_channels,
                                 kernel_size=1,
                                 stride=1,
                                 groups=1)
    self.bn2 = BatchNorm2d(out_channels)

  def forward(self, x):
    x = self.depthwise_conv(x)
    x = self.bn1(x)
    x = relu(x)
    x = self.pointwise_conv(x)
    x = self.bn2(x)
    x = relu(x)
    return x


class MobileNetV1(nn.Module):
  """
  docstring for MobileNetV1
  MobileNetV1 Body Architecture
  | Type / Stride | Filter Shape        | Input Size     | Output Size      |
  | :------------ | :------------------ | :------------- | :-------------   |
  | Conv / s2     | 3 × 3 × 3 × 32      | 224 x 224 x 3  | 112 x 112 x 32   |
  | Conv dw / s1  | 3 × 3 × 32 dw       | 112 x 112 x 32 | 112 x 112 x 32   |
  | Conv / s1     | 1 × 1 × 32 x 64     | 112 x 112 x 32 | 112 x 112 x 64   |
  | Conv dw / s2  | 3 × 3 × 64 dw       | 112 x 112 x 64 | 56 x 56 x 64     |
  | Conv / s1     | 1 × 1 × 64 × 128    | 56 x 56 x 64   | 56 x 56 x 128    |
  | Conv dw / s1  | 3 × 3 × 128 dw      | 56 x 56 x 128  | 56 x 56 x 128    |
  | Conv / s1     | 1 × 1 × 128 × 128   | 56 x 56 x 128  | 56 x 56 x 128    |
  | Conv dw / s2  | 3 × 3 × 128 dw      | 56 x 56 x 128  | 28 x 28 x 128    |
  | Conv / s1     | 1 × 1 × 128 × 256   | 28 x 28 x 128  | 28 x 28 x 256    |
  | Conv dw / s1  | 3 × 3 × 256 dw      | 28 x 28 x 256  | 28 x 28 x 256    |
  | Conv / s1     | 1 × 1 × 256 × 256   | 28 x 28 x 256  | 28 x 28 x 256    |
  | Conv dw / s2  | 3 × 3 × 256 dw      | 28 x 28 x 256  | 14 x 14 x 256    |
  | Conv / s1     | 1 × 1 × 256 × 512   | 14 x 14 x 256  | 14 x 14 x 512    |
  | Conv dw / s1  | 3 × 3 × 512 dw      | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv / s1     | 1 × 1 × 512 × 512   | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv dw / s1  | 3 × 3 × 512 dw      | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv / s1     | 1 × 1 × 512 × 512   | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv dw / s1  | 3 × 3 × 512 dw      | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv / s1     | 1 × 1 × 512 × 512   | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv dw / s1  | 3 × 3 × 512 dw      | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv / s1     | 1 × 1 × 512 × 512   | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv dw / s1  | 3 × 3 × 512 dw      | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv / s1     | 1 × 1 × 512 × 512   | 14 x 14 x 512  | 14 x 14 x 512    |
  | Conv dw / s2  | 3 × 3 × 512 dw      | 14 x 14 x 512  | 7 x 7 x 512      |
  | Conv / s1     | 1 × 1 × 512 × 1024  | 7 x 7 x 512    | 7 x 7 x 1024     |
  | Conv dw / s1  | 3 × 3 × 1024 dw     | 7 x 7 x 1024   | 7 x 7 x 1024     |
  | Conv / s1     | 1 × 1 × 1024 × 1024 | 7 x 7 x 1024   | 7 x 7 x 1024     |
  | AvgPool / s1  | Pool 7 × 7          | 7 x 7 x 1024   | 1 x 1 x 1024     |
  | FC / s1       | 1024 x 1000         | 1 x 1 x 1024   | 1 x 1 x 1000     |
  | Softmax / s1  | Classifier          | 1 x 1 x 1000   | 1 x 1 x 1000     |

  """

  def __init__(self, resolution=224, num_classes=41, multiplier=1):

    super(MobileNetV1, self).__init__()
    self.name = "MobileNetV1_%d_%03d" % (resolution, int(multiplier * 100))
    assert(resolution % 32 == 0)
    self.first_in_channel = _make_divisible(32 * multiplier, 8)
    self.last_out_channel = _make_divisible(1024 * multiplier, 8)
    self.features = nn.Sequential(
        Conv2d(8, self.first_in_channel, kernel_size=3, stride=2, padding=1),
        DepthSepConv(32, 64, stride=1, multiplier=multiplier),
        DepthSepConv(64, 128, stride=2, multiplier=multiplier),
        DepthSepConv(128, 128, stride=1, multiplier=multiplier),
        DepthSepConv(128, 256, stride=2, multiplier=multiplier),
        DepthSepConv(256, 256, stride=1, multiplier=multiplier),
        DepthSepConv(256, 512, stride=2, multiplier=multiplier),
        DepthSepConv(512, 512, stride=1, multiplier=multiplier),
        DepthSepConv(512, 512, stride=1, multiplier=multiplier),
        DepthSepConv(512, 512, stride=1, multiplier=multiplier),
        DepthSepConv(512, 512, stride=1, multiplier=multiplier),
        DepthSepConv(512, 512, stride=1, multiplier=multiplier),
        DepthSepConv(512, 1024, stride=2, multiplier=multiplier),
        DepthSepConv(1024, 1024, stride=1, multiplier=multiplier))

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Dropout(0.5),
        nn.Linear(50176,num_classes)
    )

  def forward(self, x):

    x = self.features(x)
    x = self.classifier(x)
   
    return x

In [5]:
log = Log("log/VIT_Spec", "vit_emg")
model = MobileNetV1().to(device).double()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
epochs = 500
train_losses = []
valid_losses = []
train_accuracy = []
val_accuracy = []

train_f1score_weighted = []
val_f1scroe_weighted = []

train_f1score_micro = []
val_f1scroe_micro = []

test_log = []

best_f1 = -1000

for epoch in range(epochs):
    # training
    model, train_loss, optimizer = train(
        train_loader, model, criterion, optimizer, device)

    # validation
    with torch.no_grad():
        model, valid_loss = validate(valid_loader, model, criterion, device)
    train_acc, f1_score_weighted, f1_score_micro = get_accuracy(
        model, train_loader, device)
    # save f1 score
    train_f1score_weighted.append(f1_score_weighted)
    train_f1score_micro.append(f1_score_micro)

    val_acc, f1_score_weighted, f1_score_micro = get_accuracy(
        model, valid_loader, device)
    # save f1 score
    if best_f1 < f1_score_micro:
        torch.save(model.state_dict(),
                   f"log/VIT_Spec/best_model{epoch}.pth")
        log.save_model(model)
        best_f1 = f1_score_micro
    val_f1scroe_weighted.append(f1_score_weighted)
    val_f1scroe_micro.append(f1_score_micro)
    print("Epoch {} --- Train loss = {} --- Valid loss = {} -- Train set accuracy = {} % Valid set Accuracy = {} %".format
          (epoch+1, train_loss, valid_loss, train_acc, val_acc))
    # save loss value
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)

    # save accuracy
    train_accuracy.append(train_acc)
    val_accuracy.append(val_acc)

    test_log.append(get_accuracy(model, test_loader, device))

    log.save_training_log(train_losses, train_accuracy,
                          train_f1score_weighted, train_f1score_micro)
    log.save_val_log(valid_losses, val_accuracy,
                     val_f1scroe_weighted, val_f1scroe_micro)
    log.save_test_log(test_log)


100%|██████████| 222/222 [03:46<00:00,  1.02s/it]
100%|██████████| 37/37 [00:16<00:00,  2.24it/s]


Epoch 1 --- Train loss = 4.759764627461749 --- Valid loss = 4.17502500700142 -- Train set accuracy = 5.42652027027027 % Valid set Accuracy = 5.0675675675675675 %


100%|██████████| 222/222 [03:29<00:00,  1.06it/s]
100%|██████████| 37/37 [00:12<00:00,  2.86it/s]


Epoch 2 --- Train loss = 3.164108678527759 --- Valid loss = 4.43004446457542 -- Train set accuracy = 5.13795045045045 % Valid set Accuracy = 5.827702702702703 %


100%|██████████| 222/222 [03:24<00:00,  1.08it/s]
100%|██████████| 37/37 [00:13<00:00,  2.84it/s]


Epoch 3 --- Train loss = 2.8240911274176366 --- Valid loss = 5.383923226475265 -- Train set accuracy = 16.547015765765767 % Valid set Accuracy = 8.488175675675675 %


100%|██████████| 222/222 [03:25<00:00,  1.08it/s]
100%|██████████| 37/37 [00:12<00:00,  2.90it/s]


Epoch 4 --- Train loss = 2.543158015907429 --- Valid loss = 5.06550333267175 -- Train set accuracy = 6.932713963963964 % Valid set Accuracy = 5.489864864864865 %


100%|██████████| 222/222 [03:24<00:00,  1.09it/s]
100%|██████████| 37/37 [00:12<00:00,  3.00it/s]


Epoch 5 --- Train loss = 2.2845063026398535 --- Valid loss = 5.118074227606404 -- Train set accuracy = 4.898648648648648 % Valid set Accuracy = 3.125 %


100%|██████████| 222/222 [03:25<00:00,  1.08it/s]
100%|██████████| 37/37 [00:12<00:00,  2.85it/s]


Epoch 6 --- Train loss = 2.050760888732552 --- Valid loss = 5.623036384116401 -- Train set accuracy = 6.883445945945946 % Valid set Accuracy = 4.096283783783784 %


100%|██████████| 222/222 [03:28<00:00,  1.07it/s]
100%|██████████| 37/37 [00:13<00:00,  2.69it/s]


KeyboardInterrupt: 