In [1]:
from getdata import getdataset
from torch.utils.data import DataLoader
from torch import nn
import torch
import json
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
from pandas.core import frame
from torch import nn
import torch


class TimeDistributed(nn.Module):
    def __init__(self, module, batch_first):
        super(TimeDistributed, self).__init__()
        self.module = module
        self.batch_first = batch_first

    def forward(self, input_seq):
        assert len(input_seq.size()) > 2
        reshaped_input = input_seq.contiguous().view(-1, input_seq.size(-1))
        output = self.module(reshaped_input)
        if self.batch_first:
            output = output.contiguous().view(input_seq.size(0), -1, output.size(-1))
        else:
            output = output.contiguous().view(-1, input_seq.size(1), output.size(-1))
        return output


class CNN_BLSTM(nn.Module):
    def __init__(self):
        super(CNN_BLSTM, self).__init__()
        # CNN
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(16, 16, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(16, 16, (3, 3), (1, 3), 1), nn.ReLU())
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(32, 32, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(32, 32, (3, 3), (1, 3), 1), nn.ReLU())
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(64, 64, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(64, 64, (3, 3), (1, 3), 1), nn.ReLU())
        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(128, 128, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(128, 128, (3, 3), (1, 3), 1), nn.ReLU())
        # re_shape = layers.Reshape((-1, 4*128), input_shape=(-1, 4, 128))(conv4)
        self.blstm1 = nn.LSTM(512, 128, bidirectional=True, batch_first=True)
        self.droupout = nn.Dropout(0.3)
        # FC
        self.flatten = TimeDistributed(nn.Flatten(), batch_first=True)
        self.dense1 = nn.Sequential(
            TimeDistributed(nn.Sequential(nn.Linear(in_features=256, out_features=128), nn.ReLU()), batch_first=True),
            nn.Dropout(0.3))

        # frame score
        self.frame_layer = TimeDistributed(nn.Linear(128, 1), batch_first=True)
        # avg score
        self.average_layer = nn.AdaptiveAvgPool1d(1)

    def forward(self, forward_input):
        print(forward_input.shape)
        conv1_output = self.conv1(forward_input)
        print(conv1_output.shape)
        conv2_output = self.conv2(conv1_output)
        print(conv1_output.shape)
        conv3_output = self.conv3(conv2_output)
        print(conv2_output.shape)
        conv4_output = self.conv4(conv3_output)
        print(conv3_output.shape)
        # reshape
        conv4_output = conv4_output.permute(0, 2, 1, 3)
        print(conv4_output.shape)
        conv4_output = torch.reshape(conv4_output, (conv4_output.shape[0], conv4_output.shape[1], 4 * 128))
        print(conv4_output.shape)

        # blstm
        blstm_output, (h_n, c_n) = self.blstm1(conv4_output)
        print(blstm_output.shape)
        blstm_output = self.droupout(blstm_output)
        print(blstm_output.shape)

        flatten_output = self.flatten(blstm_output)
        print(flatten_output.shape)
        fc_output = self.dense1(flatten_output)
        print(fc_output.shape)
        frame_score = self.frame_layer(fc_output)
        print(frame_score.shape)

        avg_score = self.average_layer(frame_score.permute(0, 2, 1))
        print(avg_score.shape)
        r = torch.reshape(avg_score, (avg_score.shape[0], -1))
        print(r.shape)
        return r, frame_score



In [73]:
from pandas.core import frame
from torch import nn
import torch


class TimeDistributed(nn.Module):
    def __init__(self, module, batch_first):
        super(TimeDistributed, self).__init__()
        self.module = module
        self.batch_first = batch_first

    def forward(self, input_seq):
        assert len(input_seq.size()) > 2
        reshaped_input = input_seq.contiguous().view(-1, input_seq.size(-1))
        output = self.module(reshaped_input)
        if self.batch_first:
            output = output.contiguous().view(input_seq.size(0), -1, output.size(-1))
        else:
            output = output.contiguous().view(-1, input_seq.size(1), output.size(-1))
        return output


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # CNN
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(16, 16, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(16, 16, (3, 3), (1, 3), 1), nn.ReLU())
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(32, 32, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(32, 32, (3, 3), (1, 3), 1), nn.ReLU())
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(64, 64, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(64, 64, (3, 3), (1, 3), 1), nn.ReLU())
        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.ReLU(),
            nn.Conv2d(128, 128, (3, 3), (1, 1), 1), nn.ReLU(),
            nn.Conv2d(128, 128, (3, 3), (1, 3), 1), nn.ReLU())
        # re_shape = layers.Reshape((-1, 4*128), input_shape=(-1, 4, 128))(conv4)
        self.blstm1 = nn.LSTM(512, 128, bidirectional=True, batch_first=True)
        self.droupout = nn.Dropout(0.3)
        # FC
        self.flatten = TimeDistributed(nn.Flatten(), batch_first=True)
        self.dense1 = nn.Sequential(
            TimeDistributed(nn.Sequential(nn.Linear(in_features=512, out_features=128), nn.ReLU()), batch_first=True),
            nn.Dropout(0.3))

        # frame score
        self.frame_layer = TimeDistributed(nn.Linear(128, 1), batch_first=True)
        # avg score
        self.average_layer = nn.AdaptiveAvgPool1d(1)

    def forward(self, forward_input):
        #print('teste1')
        #print(forward_input.shape)
        conv1_output = self.conv1(forward_input)
        #print('teste2')
        #print(conv1_output.shape)
        conv2_output = self.conv2(conv1_output)
        #print('teste3')
        #print(conv1_output.shape)
        conv3_output = self.conv3(conv2_output)
        #print('teste4')
        #print(conv2_output.shape)
        conv4_output = self.conv4(conv3_output)
        #print('teste5')
        #print(conv3_output.shape)
        # reshape
        conv4_output = conv4_output.permute(0, 2, 1, 3)
        #print('teste6')
        #print(conv4_output.shape)
        conv4_output = torch.reshape(conv4_output, (conv4_output.shape[0], conv4_output.shape[1], 4 * 128))
        #print('teste7')
        #print(conv4_output.shape)

        flatten_output = self.flatten(conv4_output)
        #print('teste8')
        #print(flatten_output.shape)
        fc_output = self.dense1(flatten_output)
        #print('teste9')
        #print(fc_output.shape)
        frame_score = self.frame_layer(fc_output)
        #print('teste10')
        #print(frame_score.shape)

        avg_score = self.average_layer(frame_score.permute(0, 2, 1))
        #print('teste11')
        #print(avg_score.shape)
        r = torch.reshape(avg_score, (avg_score.shape[0], -1))
        #print('teste12')
        #print(r.shape)
        return r, frame_score



In [3]:
with open('config.json') as f:
    data = f.read()
config = json.loads(data)
loaddata_config = config["loaddata_config"]
train_config = config["train_config"]

In [4]:
loaddata_config

{'num_train': 13580,
 'num_test': 4000,
 'num_valid': 3000,
 'bin_root': './data/bin/',
 'data_dir': './data',
 'fft_size': 512,
 'max_timestep': 521}

In [32]:
train_config

{'fp16_run': True,
 'output_directory': './output',
 'epochs': 100,
 'learning_rate': 0.0001,
 'batch_size': 32,
 'seed': 42,
 'checkpoint_path': '',
 'with_tensorboard': True,
 'earlystopping': 5}

In [5]:
trainset = getdataset(loaddata_config, train_config["seed"], "train")

In [6]:
train_loader = DataLoader(trainset, num_workers=0,
                          batch_size=4,
                          pin_memory=False,
                          drop_last=True)

In [26]:
model = CNN_BLSTM()

In [74]:
model = CNN()

In [8]:
torch.cuda.is_available()

True

In [9]:
torch.cuda.current_device()

0

In [10]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3060 Laptop GPU'

In [14]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [65]:
model.train()

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 3), padding=(1, 1))
    (5): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 3), padding=(1, 1))
    (5): ReLU()
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 3), padding=(1, 1))
    (5): ReLU()
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1

In [75]:
use_cuda = torch.cuda.is_available()

device = torch.device("cuda" if use_cuda else "cpu")

In [76]:
model = model.to(device)

In [58]:
for i, batch in enumerate(tqdm(train_loader)):
    model_input, [mos_y, frame_mos_y] = batch
    print(model_input.shape)
    break

  0%|                                                                                                                                                                | 0/3395 [00:00<?, ?it/s]

torch.Size([4, 1, 492, 257])





In [68]:
dataloader_iterator = iter(train_loader)
batch = next(dataloader_iterator)
model_input, [mos_y, frame_mos_y] = batch
model_input.shape

torch.Size([4, 1, 492, 257])

In [77]:
model_input = torch.autograd.Variable(model_input.to(device, dtype=torch.float))
avg_score, frame_score = model(model_input)

In [81]:
n_epochs = 3
for epoch in range(n_epochs):
    print("Epoch: {}".format(epoch))
    for i, batch in enumerate(tqdm(train_loader)):
        model.train()
        model.zero_grad()
        model_input, [mos_y, frame_mos_y] = batch
        model_input = torch.autograd.Variable(model_input.to(device, dtype=torch.float))
        mos_y = mos_y.to(device, dtype=torch.float)
        frame_mos_y = frame_mos_y.to(device, dtype=torch.float)

        avg_score, frame_score = model(model_input)
        fn_mse1 = nn.MSELoss()
        fn_mse2 = nn.MSELoss()
        #loss = fn_mse1(batch[1][0].cuda(), avg_score) + fn_mse2(batch[1][1].cuda(), frame_score)
        loss = fn_mse1(batch[1][0].to(device, dtype=torch.float), avg_score) + fn_mse2(batch[1][1].to(device, dtype=torch.float), frame_score)
        reduced_loss = loss.item()
    print("epoch:{},loss:\t{:.9f}".format(epoch, reduced_loss))

Epoch: 0


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3395/3395 [00:33<00:00, 99.88it/s]


epoch:0,loss:	23.505809784
Epoch: 1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3395/3395 [00:34<00:00, 97.07it/s]


epoch:1,loss:	23.506727219
Epoch: 2


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3395/3395 [00:35<00:00, 96.75it/s]

epoch:2,loss:	23.504512787



