In [8]:
import pandas as pd
import numpy as np
import os
import shutil
from tqdm import tqdm
from glob import glob
import librosa
import warnings

warnings.filterwarnings("ignore")

# 데이터 불러오기

In [9]:
sample_submission = pd.read_csv("./acc/sample_submission.csv")

africa_train_paths = glob("./acc/train/africa/*.wav")
australia_train_paths = glob("./acc/train/australia/*.wav")
canada_train_paths = glob("./acc/train/canada/*.wav")
england_train_paths = glob("./acc/train/england/*.wav")
hongkong_train_paths = glob("./acc/train/hongkong/*.wav")
us_train_paths = glob("./acc/train/us/*.wav")

path_list = [africa_train_paths, australia_train_paths, canada_train_paths,
             england_train_paths, hongkong_train_paths, us_train_paths]

In [12]:
path_list

[['./acc/train/africa/common_voice_en_19619731.wav',
  './acc/train/africa/common_voice_en_17270703.wav',
  './acc/train/africa/common_voice_en_126863.wav',
  './acc/train/africa/common_voice_en_20031066.wav',
  './acc/train/africa/common_voice_en_20532200.wav',
  './acc/train/africa/common_voice_en_19483.wav',
  './acc/train/africa/common_voice_en_19704006.wav',
  './acc/train/africa/common_voice_en_17343650.wav',
  './acc/train/africa/common_voice_en_20232878.wav',
  './acc/train/africa/common_voice_en_20540986.wav',
  './acc/train/africa/common_voice_en_22179519.wav',
  './acc/train/africa/common_voice_en_19783951.wav',
  './acc/train/africa/common_voice_en_20233044.wav',
  './acc/train/africa/common_voice_en_18341811.wav',
  './acc/train/africa/common_voice_en_20531983.wav',
  './acc/train/africa/common_voice_en_20515067.wav',
  './acc/train/africa/common_voice_en_19713591.wav',
  './acc/train/africa/common_voice_en_20001867.wav',
  './acc/train/africa/common_voice_en_19181259.wav'

In [19]:
import itertools
path=list(itertools.chain(path_list))

In [25]:
import itertools
path=list(itertools.chain.from_iterable(path_list))

In [28]:
path

['./acc/train/africa/common_voice_en_19619731.wav',
 './acc/train/africa/common_voice_en_17270703.wav',
 './acc/train/africa/common_voice_en_126863.wav',
 './acc/train/africa/common_voice_en_20031066.wav',
 './acc/train/africa/common_voice_en_20532200.wav',
 './acc/train/africa/common_voice_en_19483.wav',
 './acc/train/africa/common_voice_en_19704006.wav',
 './acc/train/africa/common_voice_en_17343650.wav',
 './acc/train/africa/common_voice_en_20232878.wav',
 './acc/train/africa/common_voice_en_20540986.wav',
 './acc/train/africa/common_voice_en_22179519.wav',
 './acc/train/africa/common_voice_en_19783951.wav',
 './acc/train/africa/common_voice_en_20233044.wav',
 './acc/train/africa/common_voice_en_18341811.wav',
 './acc/train/africa/common_voice_en_20531983.wav',
 './acc/train/africa/common_voice_en_20515067.wav',
 './acc/train/africa/common_voice_en_19713591.wav',
 './acc/train/africa/common_voice_en_20001867.wav',
 './acc/train/africa/common_voice_en_19181259.wav',
 './acc/train/afr

In [3]:
# glob로 test data의 path를 불러올때 순서대로 로드되지 않을 경우를 주의해야 합니다.
# test_ 데이터 프레임을 만들어서 나중에 sample_submission과 id를 기준으로 merge시킬 준비를 합니다.

def get_id(data):
    return np.int(data.split("/")[3].split(".")[0])

test_ = pd.DataFrame(index = range(0, 6100), columns = ["path", "id"])
test_["path"] = glob("./acc/test/*.wav")
test_["id"] = test_["path"].apply(lambda x : get_id(x))

test_.head()

Unnamed: 0,path,id
0,./acc/test/1636.wav,1636
1,./acc/test/2045.wav,2045
2,./acc/test/3766.wav,3766
3,./acc/test/576.wav,576
4,./acc/test/5634.wav,5634


# 데이터 전처리

In [4]:
def load_data(paths):

    result = []
    for path in tqdm(paths):
        # sr = 16000이 의미하는 것은 1초당 16000개의 데이터를 샘플링 한다는 것입니다.
        data, sr = librosa.load(path, sr = 16000)
        #data = data.reshape(-1, 1)
        result.append(data)
    result = np.array(result) 
    # 메모리가 부족할 때는 데이터 타입을 변경해 주세요 ex) np.array(data, dtype = np.float32)

    return result

In [38]:
# train 데이터를 로드하기 위해서는 많은 시간이 소모 됩니다.
# 따라서 추출된 정보를 npy파일로 저장하여 필요 할 때마다 불러올 수 있게 준비합니다.

os.mkdir("./raw_audio")

africa_train_data = load_data(africa_train_paths)
np.save("./raw_audio/africa_npy", africa_train_data)

australia_train_data = load_data(australia_train_paths)
np.save("./raw_audio/australia_npy", australia_train_data)

canada_train_data = load_data(canada_train_paths)
np.save("./raw_audio/canada_npy", canada_train_data)

england_train_data = load_data(england_train_paths)
np.save("./raw_audio/england_npy", england_train_data)

hongkong_train_data = load_data(hongkong_train_paths)
np.save("./raw_audio/hongkong_npy", hongkong_train_data)

us_train_data = load_data(us_train_paths)
np.save("./raw_audio/us_npy", us_train_data)

test_data = load_data(test_["path"])
np.save("./raw_audio/test_npy", test_data)

100%|██████████| 2500/2500 [09:31<00:00,  4.38it/s]
100%|██████████| 1000/1000 [03:43<00:00,  4.47it/s]
100%|██████████| 1000/1000 [03:52<00:00,  4.30it/s]
100%|██████████| 10000/10000 [37:43<00:00,  4.42it/s] 
100%|██████████| 1020/1020 [06:35<00:00,  2.58it/s]
100%|██████████| 10000/10000 [38:16<00:00,  4.36it/s] 
100%|██████████| 6100/6100 [23:15<00:00,  4.37it/s]  


In [13]:
# npy파일로 저장된 데이터를 불러옵니다.
africa_train_data = np.load("./npy_data/africa_npy.npy", allow_pickle = True)
australia_train_data = np.load("./npy_data/australia_npy.npy", allow_pickle = True)
canada_train_data = np.load("./npy_data/canada_npy.npy", allow_pickle = True)
england_train_data = np.load("./npy_data/england_npy.npy", allow_pickle = True)
hongkong_train_data = np.load("./npy_data/hongkong_npy.npy", allow_pickle = True)
us_train_data = np.load("./npy_data/us_npy.npy", allow_pickle = True)

test_data = np.load("./npy_data/test_npy.npy", allow_pickle = True)

train_data_list = [africa_train_data, australia_train_data, canada_train_data, england_train_data, hongkong_train_data, us_train_data]

In [15]:
len(train_data_list)

6

In [8]:
# 이번 대회에서 음성은 각각 다른 길이를 갖고 있습니다.
# baseline 코드에서는 음성 중 길이가 가장 작은 길이의 데이터를 기준으로 데이터를 잘라서 사용합니다.

def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

#음성들의 길이를 맞춰줍니다.

def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result


#feature를 생성합니다.

def get_feature(data, sr = 16000, n_fft = 1042, win_length = 512, hop_length = 512, n_mels = 64):
    mel = []
    for i in data:
        # win_length 는 음성을 작은 조각으로 자를때 작은 조각의 크기입니다.
        # hop_length 는 음성을 작은 조각으로 자를때 자르는 간격을 의미합니다.
        # n_mels 는 적용할 mel filter의 개수입니다.
        mel_ = librosa.feature.melspectrogram(i, sr = sr, n_fft = n_fft, win_length = win_length, hop_length = hop_length, n_mels = n_mels)
        mel.append(mel_)
    mel = np.array(mel)
    mel = librosa.power_to_db(mel, ref = np.max)

    mel_mean = mel.mean()
    mel_std = mel.std()
    mel = (mel - mel_mean) / mel_std

    return mel

def get_feature_mfcc(data, sr = 44100, n_mfcc = 64, win_length = 512, hop_length = 512):
    mfcc = []
    for i in data:
        # win_length 는 음성을 작은 조각으로 자를때 작은 조각의 크기입니다.
        # hop_length 는 음성을 작은 조각으로 자를때 자르는 간격을 의미합니다.
        # n_mels 는 적용할 mel filter의 개수입니다.
        mel_ = librosa.feature.mfcc(i, sr = sr, n_mfcc = n_mfcc, win_length = win_length, hop_length = hop_length)
        mfcc.append(mel_)
    mel = np.array(mfcc)
   # mel = librosa.power_to_db(mel, ref = np.max)

   # mel_mean = mel.mean()
   # mel_std = mel.std()
   # mel = (mel - mel_mean) / mel_std

    return mel



#librosa.feature.mfcc(y=None, sr=22050, S=None, n_mfcc=20, dct_type=2, norm='ortho', lifter=0, **kwargs)

In [4]:
import librosa

In [1]:
class config:
    sampling_rate = 44100  # 44.1 kHz
    duration = 4 #2 # Minimum length for short samples (seconds)
    samples = sampling_rate * duration # Minimum sample size
    
    top_db = 60 # Noise filtering, default = 60
    
    # Frequencies kept in spectrograms
    fmin = 20
    fmax =  sampling_rate // 2  # Shannon theorem

    # Spectrogram parameters
    n_mels = 64 # = spec_height
    n_fft = n_mels * 30 # Size of fft window - smooths the spectrogram
    spec_min_width = 256 #128
    x_mean,x_std = -35.7, 21.6
    hop_length = duration * sampling_rate // spec_min_width + 1 # Number of samples between each frame - impacts y size of spectrogram

In [2]:
def read_audio(pathname, conf, trim_long_data):
    y, sr = librosa.load(pathname, sr=conf.sampling_rate)
    # trim silence
    if len(y) > 0: # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y) # trim, top_db=default(60)
    # make it unified length to conf.samples
    if len(y) > conf.samples: # long enough
        if trim_long_data: y = y[0:0+conf.samples]
    else: # pad blank
        padding = conf.samples - len(y)    # add padding at both ends
        offset = padding // 2
        y = np.pad(y, (offset, conf.samples - len(y) - offset), 'constant')
    return y

In [5]:
def audio_to_melspectrogram(audio, config, three_chanels=False):
    spectrogram = librosa.feature.melspectrogram(audio, 
                                                 sr=config.sampling_rate,
                                                 n_mels=config.n_mels,
                                                 hop_length=config.hop_length,
                                                 n_fft=config.n_fft,
                                                 fmin=config.fmin,
                                                 fmax=config.fmax)
    logmel = librosa.power_to_db(spectrogram).astype(np.float32)
    
    if three_chanels:
        return np.array([logmel, librosa.feature.delta(logmel), librosa.feature.delta(logmel, order=2)])
    else:
        return logmel

In [6]:
def normalize(X, mean=None, std=None):
    mean = mean or X.mean()
    std = std or (X-X.mean()).std()
    return ((X - mean)/std).astype(np.float16)

In [35]:
def process(path, config, crop=False, three_chanels=False):
    X = []
    for i in path:
        signal = read_audio(i, config, crop)
        X.append(normalize(audio_to_melspectrogram(signal, config), config.x_mean, config.x_std)) #normalize based on global statistics
        #X.append(normalize(audio_to_melspectrogram(signal, config))) #normalize based on individual statistics
    return X

In [36]:
three_chanels = False
crop = False

In [None]:
X_train = process(path, config, crop=crop, three_chanels=three_chanels)

In [9]:
train_x = np.concatenate(train_data_list, axis= 0)
test_x = np.array(test_data)

# 음성의 길이 중 가장 작은 길이를 구합니다.

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

mini = np.min([train_mini, test_mini])

# data의 길이를 가장 작은 길이에 맞춰 잘라줍니다.

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

# librosa를 이용해 feature를 추출합니다.

train_x = get_feature(data = train_x)
test_x = get_feature(data = test_x)

train_x = train_x.reshape(-1, train_x.shape[1], train_x.shape[2], 1)
test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

In [10]:
train_x.shape

(25520, 64, 157, 1)

In [10]:
train_y = np.concatenate((np.zeros(len(africa_train_data), dtype = np.int),
                        np.ones(len(australia_train_data), dtype = np.int),
                         np.ones(len(canada_train_data), dtype = np.int) * 2,
                         np.ones(len(england_train_data), dtype = np.int) * 3,
                         np.ones(len(hongkong_train_data), dtype = np.int) * 4,
                         np.ones(len(us_train_data), dtype = np.int) * 5), axis = 0)

In [11]:
train_y = pd.get_dummies(train_y).to_numpy(dtype = 'long')

In [12]:
train_y.shape

(25520, 6)

In [13]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
import timm
device='cuda'

In [19]:
train_x.shape # N x H x W x C  numpy
train_x.shape # N x C x H x W torch

(25520, 64, 157, 1)

In [14]:
class ToTensor(object):
    """numpy array를 tensor(torch)로 변환합니다."""
    def __call__(self, sample):
        x, y = sample['x'], sample['y']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        x = x.transpose((2, 0, 1))
        return {'x': torch.FloatTensor(x),
                'y': torch.FloatTensor(y)}
to_tensor = transforms.Compose([
                      ToTensor() 
])
class CustomDataset(torch.utils.data.Dataset): 
    def __init__(self,train_x,train_y,transforms=to_tensor):
        self.x_data = train_x
        self.y_data = train_y
        self.transforms = transforms# Transform

    def __len__(self):
        return len(self.x_data)#x_data길이

    def __getitem__(self, idx): 
        
        x = self.x_data[idx]
        y = self.y_data[idx]
        sample = {'x': x, 'y': y}
        if self.transforms:
            sample = self.transforms(sample)
        y = y.astype(np.float32)
        
        
        return (x,y)

In [15]:
class ConvBnRelu(nn.Module):
    def __init__(self, in_channel, out_channel, kernel_size, stride=1, padding=0, dilation=1,
                 groups=1):
        super(ConvBnRelu, self).__init__()
        self.conv_bn_relu = nn.Sequential(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, dilation, groups,
                      False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU(True))

        
    def forward(self, x):
        return self.conv_bn_relu(x)


class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size()[0], -1)


class EnvNetv2(nn.Module):
    def __init__(self, num_classes=6):
        super(EnvNetv2, self).__init__()
        self.conv1 = ConvBnRelu(1, 32, (1, 64), stride=(1, 2))
        self.conv2 = ConvBnRelu(32, 64, (1, 16), stride=(1, 2))
        self.conv3 = ConvBnRelu(1, 32, (8, 8))
        self.conv4 = ConvBnRelu(32, 32, (8, 8))
        self.conv5 = ConvBnRelu(32, 64, (1, 4))
        self.conv6 = ConvBnRelu(64, 64, (1, 4))
        self.conv7 = ConvBnRelu(64, 128, (1, 2))
        self.conv8 = ConvBnRelu(128, 128, (1, 2))
        self.conv9 = ConvBnRelu(128, 256, (1, 2))
        self.conv10 = ConvBnRelu(256, 256, (1, 2))
        self.maxpool1 = nn.MaxPool2d((1, 64), stride=(1, 64))
        self.maxpool2 = nn.MaxPool2d((5, 3), stride=(5, 3))
        self.maxpool3 = nn.MaxPool2d((1, 2), stride=(1, 2))
        self.avgpool = nn.AdaptiveMaxPool2d((10, 1))
        self.flatten = Flatten()
        self.last_linear1 = nn.Sequential(
            nn.Linear(256 * 10, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, num_classes),
        )
        self.last_linear2 = nn.Sequential(
            nn.Linear(256 * 10, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, num_classes),
        )

    def forward(self, input):
        h = self.conv1(input)
        h = self.conv2(h)
        h = self.maxpool1(h)
        h = h.transpose(1, 2)
        h = self.conv3(h)
        h = self.conv4(h)
        h = self.maxpool2(h)
        h = self.conv5(h)
        h = self.conv6(h)
        h = self.maxpool3(h)
        h = self.conv7(h)
        h = self.conv8(h)
        h = self.maxpool3(h)
        h = self.conv9(h)
        h = self.conv10(h)
        # h = self.maxpool3(h)
        h = self.avgpool(h)
        h = self.flatten(h)
        h = self.last_linear1(h)
        return h

In [17]:
model = EnvNetv2()
model.to(device)

EnvNetv2(
  (conv1): ConvBnRelu(
    (conv_bn_relu): Sequential(
      (0): Conv2d(1, 32, kernel_size=(1, 64), stride=(1, 2), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (conv2): ConvBnRelu(
    (conv_bn_relu): Sequential(
      (0): Conv2d(32, 64, kernel_size=(1, 16), stride=(1, 2), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (conv3): ConvBnRelu(
    (conv_bn_relu): Sequential(
      (0): Conv2d(1, 32, kernel_size=(8, 8), stride=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (conv4): ConvBnRelu(
    (conv_bn_relu): Sequential(
      (0): Conv2d(32, 32, kernel_size=(8, 8), stride=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=

In [83]:
model.to(device)

MultiLabelResnet(
  (conv2d): Conv2d(64, 3, kernel_size=(1, 1), stride=(1, 1))
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), pa

In [17]:
train_x.shape

(25520, 40, 157, 1)

In [254]:
train_x.flatten()

array([-1131.3708   , -1131.3708   ,  -901.063    , ...,     3.7879317,
           2.1753721,    -8.299694 ], dtype=float32)

In [258]:
train_x

(40, 157)

In [31]:
import torchvision.models as models

In [41]:
model=models.inception_v3()

In [None]:
inception_resnet_v2

In [15]:
wide_resnet50_2 = models.wide_resnet50_2()

In [16]:
wide_resnet50_2

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), strid

In [17]:
def mixup_data(x, y, alpha=1.0, use_cuda=True):

    '''Compute the mixup data. Return mixed inputs, pairs of targets, and lambda'''
    if alpha > 0.:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1.
    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)
#     lam = max(lam, 1 - lam)
    mixed_x = lam * x + (1 - lam) * x[index,:]
    mixed_y = lam * y + (1 - lam) * y[index]
    return mixed_x, mixed_y

def mixup_criterion(y_a, y_b, lam):
    return lambda criterion, pred: lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [18]:
model=wide_resnet50_2

In [61]:
class MultiLabelResnet(nn.Module):
    def __init__(self):
        super(MultiLabelResnet, self).__init__()
        self.conv2d = nn.Conv2d(64, 3, 1, stride=1)
        self.resnet = models.wide_resnet50_2()#timm.create_model('efficientnet_b5', pretrained=False) 
        self.resnet = timm.create_model('resnet101', pretrained=False) 
        self.FC = nn.Linear(1000, 6)

    def forward(self, x):
        # resnet의 입력은 [3, N, N]으로
        # 3개의 채널을 갖기 때문에
        # resnet 입력 전에 conv2d를 한 층 추가
        x = F.relu(self.conv2d(x))

        # resnet18을 추가
        x = F.relu(self.resnet(x))

        # 마지막 출력에 nn.Linear를 추가
        # multilabel을 예측해야 하기 때문에
        # softmax가 아닌 sigmoid를 적용
        #x = self.FC(x)
        x = torch.sigmoid(self.FC(x))
        return x
# 모델 선언

model = MultiLabelResnet()
model.to(device)

MultiLabelResnet(
  (conv2d): Conv2d(64, 3, kernel_size=(1, 1), stride=(1, 1))
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=

In [34]:
model

MultiLabelResnet(
  (resnet): InceptionResnetV2(
    (conv2d_1a): BasicConv2d(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (conv2d_2a): BasicConv2d(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (conv2d_2b): BasicConv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2d_3b): BasicConv2d(
      (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=Tr

In [50]:
x=torch.randn([32,64,157,1])
#conv2d = nn.Conv2d(64, 3, 1, stride=1)
#x = F.relu(conv2d(x))
#x=x.to(device)
model.to(device)
model(x)

RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[32, 64, 157, 1] to have 3 channels, but got 64 channels instead

In [95]:
model.to(device)

MultiLabelResnet(
  (conv2d): Conv2d(64, 3, kernel_size=(1, 1), stride=(1, 1))
  (resnet): EfficientNet(
    (conv_stem): Conv2d(3, 136, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(136, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): SiLU(inplace=True)
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(136, 136, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=136, bias=False)
          (bn1): BatchNorm2d(136, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): SiLU(inplace=True)
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(136, 34, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(34, 136, kernel_size=(1, 1), stride=(1, 1))
          )
          (conv_pw): Conv2d(136, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): Batc

In [274]:
train_x=train_x.flatten()

In [276]:
train_x.shape

(160265600,)

In [25]:
dataset = CustomDataset(train_x,train_y)
dataloader = DataLoader(dataset, batch_size=32,shuffle=True)

In [58]:
for idx, sample in enumerate(dataloader):
    print(idx, sample[0].size(),
          sample[0].size())


0 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
1 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
2 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
3 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
4 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
5 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
6 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
7 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
8 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
9 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
10 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
11 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
12 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
13 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
14 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
15 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
16 torch.Size([32, 64, 157, 1]) to

233 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
234 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
235 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
236 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
237 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
238 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
239 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
240 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
241 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
242 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
243 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
244 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
245 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
246 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
247 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
248 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
249 torc

469 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
470 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
471 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
472 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
473 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
474 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
475 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
476 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
477 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
478 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
479 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
480 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
481 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
482 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
483 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
484 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
485 torc

706 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
707 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
708 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
709 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
710 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
711 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
712 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
713 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
714 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
715 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
716 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
717 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
718 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
719 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
720 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
721 torch.Size([32, 64, 157, 1]) torch.Size([32, 64, 157, 1])
722 torc

In [202]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x7f8324255b38>

In [186]:
data=[]
target=[]
for batch_idx,(data,target) in enumerate(dataloader):
    if batch_idx <=3 :
        data.add(data)
        target.add(target)
        break

In [187]:
data.shape

torch.Size([64, 64, 157])

In [245]:
data[0].flatten().shape

torch.Size([10048])

In [189]:
target[0].shape

torch.Size([6])

In [81]:
data1=data.to(device)

In [92]:
target1=target.to(device)

In [83]:
model(data1).shape#1배치

torch.Size([64, 6])

In [84]:
probs=model(data1)

In [85]:
probs

tensor([[0.4839, 0.5130, 0.4949, 0.4739, 0.5186, 0.4744],
        [0.5010, 0.5049, 0.4765, 0.4757, 0.5228, 0.4513],
        [0.4969, 0.5298, 0.5030, 0.4910, 0.4771, 0.4688],
        [0.5044, 0.4921, 0.4884, 0.4966, 0.5075, 0.4563],
        [0.4844, 0.5272, 0.5199, 0.4935, 0.4899, 0.4808],
        [0.4997, 0.4985, 0.4868, 0.4726, 0.4899, 0.4627],
        [0.5240, 0.4984, 0.4564, 0.4959, 0.5045, 0.4621],
        [0.4949, 0.4875, 0.4989, 0.4814, 0.5024, 0.4708],
        [0.4584, 0.5335, 0.4893, 0.5194, 0.5138, 0.4837],
        [0.5118, 0.5267, 0.4881, 0.5038, 0.5076, 0.4603],
        [0.5042, 0.5344, 0.4969, 0.4919, 0.5029, 0.4801],
        [0.4794, 0.5208, 0.5045, 0.5071, 0.4934, 0.4726],
        [0.4903, 0.5170, 0.4883, 0.4826, 0.5056, 0.4594],
        [0.4966, 0.5232, 0.5025, 0.4648, 0.5040, 0.4480],
        [0.4780, 0.5466, 0.4844, 0.5117, 0.4988, 0.4729],
        [0.4893, 0.5220, 0.4778, 0.4707, 0.5387, 0.4655],
        [0.4829, 0.5106, 0.4901, 0.4931, 0.4958, 0.4754],
        [0.486

In [86]:
probs = F.softmax(probs) #합산 1

In [99]:
probs.argmax()

tensor(274, device='cuda:0')

In [105]:
probs[1].argmax()

tensor(4, device='cuda:0')

In [106]:
target1[1].argmax()

tensor(4, device='cuda:0')

In [87]:
probs

tensor([[0.1651, 0.1700, 0.1669, 0.1635, 0.1709, 0.1636],
        [0.1687, 0.1693, 0.1646, 0.1645, 0.1724, 0.1605],
        [0.1671, 0.1726, 0.1681, 0.1661, 0.1638, 0.1624],
        [0.1689, 0.1668, 0.1662, 0.1676, 0.1694, 0.1610],
        [0.1642, 0.1714, 0.1701, 0.1657, 0.1651, 0.1636],
        [0.1691, 0.1689, 0.1669, 0.1646, 0.1675, 0.1630],
        [0.1724, 0.1680, 0.1611, 0.1676, 0.1690, 0.1620],
        [0.1676, 0.1664, 0.1683, 0.1653, 0.1688, 0.1636],
        [0.1599, 0.1723, 0.1649, 0.1699, 0.1690, 0.1640],
        [0.1687, 0.1712, 0.1647, 0.1673, 0.1679, 0.1602],
        [0.1671, 0.1722, 0.1658, 0.1650, 0.1668, 0.1631],
        [0.1638, 0.1708, 0.1680, 0.1685, 0.1662, 0.1627],
        [0.1666, 0.1711, 0.1663, 0.1653, 0.1692, 0.1615],
        [0.1677, 0.1723, 0.1687, 0.1625, 0.1690, 0.1598],
        [0.1632, 0.1748, 0.1642, 0.1688, 0.1666, 0.1624],
        [0.1658, 0.1713, 0.1639, 0.1628, 0.1742, 0.1619],
        [0.1653, 0.1699, 0.1664, 0.1670, 0.1674, 0.1640],
        [0.166

In [96]:
target1

tensor([[0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0

In [43]:
criterion  = torch.nn.BCELoss()
#loss = criterion(probs, target1)

In [97]:
loss

tensor(0.4517, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)

In [40]:
len(dataloader)

798

In [41]:
len(dataset)

25520

In [54]:
model.to(device)

MultiLabelResnet(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=

In [62]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001,momentum=0.9,weight_decay=0.0005)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=50,gamma=0.1)
criterion  = torch.nn.BCELoss()
def train(model,dataloader,optimizer,epoch,batch):
    model.train()
    train_loss=0
    train_acc_list=[]
    correct=0
    for batch_idx,(data,target) in enumerate(dataloader):
        if batch_idx==797:
            break
        data,target = data.to(device),target.to(device)
        data,target = mixup_data(data,target, alpha=1, use_cuda=True)
        optimizer.zero_grad()
    
        output = model(data)
        probs = F.softmax(output)
        
        loss = criterion(probs, target)
        
        loss.backward()
        
        optimizer.step()
        

        
        cnt = 0
        
        for i in range(0,31):
            if probs[i].argmax() == target[i].argmax():
                        cnt +=1
                
        batch_acc = cnt/32
        train_acc_list.append(batch_acc)
        train_acc = np.mean(train_acc_list)
        train_loss=loss.item()

            # 현재 progress bar에 현재 미니배치의 loss 결과 출력
        #train_bar.set_postfix(train_loss= loss.item(),
         #                           train_acc = train_acc)
        
        #output = output.cpu().detach().numpy()
        #target = target.cpu().detach().numpy()
        
        #train_loss = F.cross_entropy(output,target,reduction='sum').item()
        #out = torch.argmax(target, 1) 
        #correct+=pred.eq(target.view_as(pred)).sum().item()
        
    
    return train_loss,train_acc



In [25]:
!nvidia-smi

/usr/bin/nvidia-modprobe: unrecognized option: "-s"

ERROR: Invalid commandline, please run `/usr/bin/nvidia-modprobe --help` for
       usage information.

/usr/bin/nvidia-modprobe: unrecognized option: "-s"

ERROR: Invalid commandline, please run `/usr/bin/nvidia-modprobe --help` for
       usage information.

Wed Jun 30 12:11:26 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.102.04   Driver Version: 450.102.04   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 208...  Off  | 00000000:01:00.0 Off |                  N/A |
| 35%   37C    P8    28W / 260W |   2205MiB / 11016MiB |      0%      Defaul

In [60]:
!kill -9 3214

/bin/bash: line 0: kill: (3214) - No such process


In [63]:
epochs=50
for epoch in range(1,epochs+1):
    scheduler.step()
    train_loss,train_accuracy = train(model,dataloader,optimizer,epoch,batch=32)
    
    print('[{} Train Loss: {:.4f},Accuracy:{:.4f}'.format(epoch,train_loss,train_accuracy))

[1 Train Loss: 0.3888,Accuracy:0.3770
[2 Train Loss: 0.3918,Accuracy:0.3870
[3 Train Loss: 0.3906,Accuracy:0.3905
[4 Train Loss: 0.3657,Accuracy:0.3916
[5 Train Loss: 0.3836,Accuracy:0.3937
[6 Train Loss: 0.3778,Accuracy:0.3912
[7 Train Loss: 0.3838,Accuracy:0.3926
[8 Train Loss: 0.3777,Accuracy:0.3941
[9 Train Loss: 0.3774,Accuracy:0.3919
[10 Train Loss: 0.3775,Accuracy:0.3916
[11 Train Loss: 0.3838,Accuracy:0.3928
[12 Train Loss: 0.3521,Accuracy:0.3975
[13 Train Loss: 0.3647,Accuracy:0.3919
[14 Train Loss: 0.3836,Accuracy:0.3951
[15 Train Loss: 0.3963,Accuracy:0.3912
[16 Train Loss: 0.3772,Accuracy:0.3977
[17 Train Loss: 0.3583,Accuracy:0.3944
[18 Train Loss: 0.3836,Accuracy:0.3929
[19 Train Loss: 0.3773,Accuracy:0.3915
[20 Train Loss: 0.3774,Accuracy:0.3915
[21 Train Loss: 0.3455,Accuracy:0.3944
[22 Train Loss: 0.3837,Accuracy:0.3974
[23 Train Loss: 0.3836,Accuracy:0.3959
[24 Train Loss: 0.3837,Accuracy:0.3948
[25 Train Loss: 0.3710,Accuracy:0.3960
[26 Train Loss: 0.3836,Accuracy:0.

In [37]:
!kill -9 7565

In [33]:
model.to(device)

Res1dNet31(
  (conv0): Conv1d(1, 64, kernel_size=(11,), stride=(5,), padding=(5,), bias=False)
  (bn0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (resnet): _ResNetWav1d(
    (layer1): Sequential(
      (0): _ResnetBasicBlockWav1d(
        (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(2,), dilation=(2,), bias=False)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): _ResnetBasicBlockWav1d(
        (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv1d(64, 64, kernel_size=(3,),

In [347]:
import torch.optim as optim

criterion = nn.MultiLabelSoftMarginLoss()#nn.CrossEntropyLoss()#BCEWithLogitsLoss()#
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
losses = []

running_loss = 0.0
for i, data in enumerate(dataloader, 0):
    # get the inputs
    inputs, labels = data
    inputs = inputs.float()
    labels = labels.float()

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = mode(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # print statistics
    running_loss += loss.item()
    if i % 200 == 199:    # print every 2000 mini-batches
        print('[%d, %5d] loss: %.3f' %
              (epoch + 1, i + 1, running_loss / 200))
        running_loss = 0.0

In [34]:
from tqdm import tqdm

# 모델의 dropoupt, batchnormalization를 train 모드로 설정
model.train()

for epoch in range(50):
    # 1개 epoch 훈련
    train_acc_list = []
    with tqdm(dataloader,#train_data_loader를 iterative하게 반환
            total=dataloader.__len__(), # train_data_loader의 크기
            unit="batch") as train_bar: # 한번 반환하는 smaple의 단위는 "batch"
        for idx,sample in enumerate(train_bar):
            #if idx == 224 :
            #    break
            train_bar.set_description(f"Train Epoch {epoch}")
            # 갱신할 변수들에 대한 모든 변화도를 0으로 초기화
            # 참고)https://tutorials.pytorch.kr/beginner/pytorch_with_examples.html
            optimizer.zero_grad()

            images, labels = sample
            # tensor를 gpu에 올리기 
            images = images.to(device)
            labels = labels.to(device)


            
            # .forward()에서 중간 노드의 gradient를 계산
            with torch.set_grad_enabled(True):
                # 모델 예측
                probs = model(images)
                probs = F.softmax(probs)
               # probs = (probs == probs.max()) * 1.0
                #loss = criterion(probs, y_train)
                
                
                loss = criterion(probs, labels)
                #loss = criterion(probs, torch.max(y_train, 1)[1])

                loss.backward()
                optimizer.step()
                
                probs  = probs.cpu().detach().numpy()
                labels = labels.cpu().detach().numpy()
                # train accuracy 계산
                cnt = 0
                for i in range(batch):
                    
                    if probs[i].argmax() == labels[i].argmax():
                        cnt +=1
                
                #preds = probs > 0.5
                #batch_acc = (labels == preds).mean()
                batch_acc = cnt/10
                train_acc_list.append(batch_acc)
                train_acc = np.mean(train_acc_list)

            # 현재 progress bar에 현재 미니배치의 loss 결과 출력
            train_bar.set_postfix(train_loss= loss.item(),
                                    train_acc = train_acc)
            

Train Epoch 0:   0%|          | 0/399 [00:00<?, ?batch/s]


NameError: name 'criterion' is not defined

In [315]:
len(probs)


10

In [34]:
276*9

2484

In [35]:
176

176

In [41]:
(256 * 99) + 176

25520

In [39]:
25600+176

25776

In [None]:
25520

# 예측

In [59]:
model.eval()

MultiLabelResnet(
  (conv2d): Conv2d(64, 3, kernel_size=(1, 1), stride=(1, 1))
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), pa

In [64]:
test_y = pd.DataFrame(index=range(0,len(test_x)), columns=['0', '1', '2', '3', '4', '5'])
test_y = test_y.fillna(0).to_numpy()
dataset = CustomDataset(test_x,test_y)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)


In [327]:
model

Cnn14(
  (bn0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_block1): ConvBlock(
    (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block2): ConvBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block3): ConvBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),

In [76]:
len(dataloader)

610

In [77]:
len(dataset)

6100

In [78]:
for idx, sample in enumerate(dataloader):
    print(idx, sample[0].size(),
          sample[0].size())



0 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
3 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
4 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
5 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
6 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
7 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
8 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
9 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
10 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
11 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
12 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
13 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
14 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
15 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
16 torch.Size([10, 64, 157, 1]) to

347 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
348 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
349 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
350 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
351 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
352 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
353 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
354 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
355 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
356 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
357 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
358 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
359 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
360 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
361 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
362 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
363 torc

In [79]:
pred_ = []
for idx, sample in enumerate(dataloader):
    with torch.no_grad():
        # 추론
        model.eval()
        images,_ = sample
        images = images.to(device)
        probs  = model(images)
        probs = F.softmax(probs)
        probs = probs.cpu().detach().numpy()
        pred_.append(probs)
      

In [80]:
test_y

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [91]:
#len(dataloader) 6080 

SyntaxError: invalid syntax (<ipython-input-91-1e9c4e5abe43>, line 1)

In [81]:
pred_

[array([[0.10638479, 0.10599849, 0.10599609, 0.2878177 , 0.10599323,
         0.2878097 ],
        [0.1063268 , 0.10600265, 0.10600308, 0.28784603, 0.10599227,
         0.28782916],
        [0.10675909, 0.1060552 , 0.10605694, 0.28752312, 0.10604954,
         0.2875561 ],
        [0.10647988, 0.10600539, 0.10600886, 0.2877515 , 0.10600894,
         0.28774545],
        [0.10642981, 0.10600476, 0.10600089, 0.28777948, 0.10600361,
         0.2877815 ],
        [0.10647001, 0.10601738, 0.10601514, 0.28774217, 0.1060086 ,
         0.2877467 ],
        [0.10629475, 0.10598218, 0.10598254, 0.28788564, 0.10598302,
         0.2878719 ],
        [0.10620363, 0.10596783, 0.10596862, 0.28794622, 0.10597026,
         0.28794342],
        [0.10680555, 0.10605122, 0.10603824, 0.28753316, 0.10603479,
         0.28753707],
        [0.10674043, 0.10604897, 0.10604385, 0.2875653 , 0.10605088,
         0.2875506 ]], dtype=float32),
 array([[0.10639352, 0.10599124, 0.10598861, 0.28781572, 0.10599001,
    

In [None]:
212

99*64 
6100

In [150]:
del pred

In [82]:
import itertools
pred=list(itertools.chain(*pred_))

In [83]:
pred=list(itertools.chain(*pred_))

In [84]:
len(pred)

6100

In [68]:
pred

[array([0.11972019, 0.10493022, 0.10494219, 0.28251898, 0.10501168,
        0.28287673], dtype=float32),
 array([0.1365189 , 0.10316354, 0.10323951, 0.27659377, 0.10325778,
        0.27722654], dtype=float32),
 array([0.10725312, 0.10583949, 0.10584142, 0.2876058 , 0.10583994,
        0.2876202 ], dtype=float32),
 array([0.11842185, 0.10482448, 0.10482601, 0.28342316, 0.10486048,
        0.28364408], dtype=float32),
 array([0.11173568, 0.10551631, 0.10553443, 0.28579465, 0.10553264,
        0.2858863 ], dtype=float32),
 array([0.11037276, 0.10560217, 0.10560635, 0.28637022, 0.10560667,
        0.28644183], dtype=float32),
 array([0.10971007, 0.10557368, 0.10558029, 0.28676277, 0.10558523,
        0.28678796], dtype=float32),
 array([0.12401553, 0.10432792, 0.10436633, 0.28127077, 0.10438109,
        0.28163838], dtype=float32),
 array([0.11763854, 0.10487788, 0.10489731, 0.28372586, 0.10490854,
        0.2839519 ], dtype=float32),
 array([0.10905202, 0.10567749, 0.10568196, 0.28693208,

In [151]:
pred_

[array([[0.1081325 , 0.10617962, 0.10622071, 0.2866078 , 0.10617088,
         0.2866885 ],
        [0.10771678, 0.10609294, 0.10611901, 0.28694767, 0.1061004 ,
         0.28702322],
        [0.10754948, 0.10607797, 0.10606804, 0.28709522, 0.10608145,
         0.28712782],
        [0.10882153, 0.10618911, 0.10622294, 0.28615683, 0.10618307,
         0.28642657],
        [0.1073886 , 0.10608907, 0.10607734, 0.28720015, 0.10603659,
         0.28720826],
        [0.10768888, 0.1060693 , 0.10605559, 0.28701654, 0.10605422,
         0.28711542],
        [0.10741203, 0.10607852, 0.10605197, 0.28718337, 0.10606293,
         0.28721115],
        [0.10813162, 0.10618096, 0.10617853, 0.28660873, 0.10614582,
         0.28675428],
        [0.10715533, 0.10606823, 0.10606129, 0.2873172 , 0.10603657,
         0.28736144],
        [0.10794112, 0.10619339, 0.10621163, 0.2866997 , 0.10618571,
         0.28676844]], dtype=float32),
 array([[0.10831267, 0.10620923, 0.10620708, 0.28645787, 0.10622188,
    

In [69]:
pred_[0][9]

array([0.10905202, 0.10567749, 0.10568196, 0.28693208, 0.10568243,
       0.28697404], dtype=float32)

In [165]:
np.mean(pred_, axis = 0).shape

(10, 6)

In [328]:
len(pred)

6100

In [322]:
pd.DataFrame(np.mean(pred, axis = 0))

Unnamed: 0,0
0,0.107848
1,0.106136
2,0.106138
3,0.286825
4,0.106132
5,0.28692


(6100, 64, 501, 1)

In [85]:
test_

Unnamed: 0,path,id
0,./acc/test/1636.wav,1636
1,./acc/test/2045.wav,2045
2,./acc/test/3766.wav,3766
3,./acc/test/576.wav,576
4,./acc/test/5634.wav,5634
...,...,...
6095,./acc/test/679.wav,679
6096,./acc/test/1267.wav,1267
6097,./acc/test/1214.wav,1214
6098,./acc/test/391.wav,391


In [86]:
def cov_type(data):
    return np.int(data)

# 처음에 살펴본 것처럼 glob로 test data의 path는 sample_submission의 id와 같이 1,2,3,4,5.....으로 정렬 되어있지 않습니다.
# 만들어둔 test_ 데이터프레임을 이용하여 sample_submission과 predict값의 id를 맞춰줍니다.
sample_submission = pd.read_csv("./acc/sample_submission.csv")
result = pd.concat([test_, pd.DataFrame(pred)], axis = 1).iloc[:, 1:]
result["id"] = result["id"].apply(lambda x : cov_type(x))

result = pd.merge(sample_submission["id"], result)
result.columns = sample_submission.columns

In [87]:
result

Unnamed: 0,id,africa,australia,canada,england,hongkong,us
0,1,0.106268,0.105979,0.105981,0.287898,0.105982,0.287893
1,2,0.106719,0.106063,0.106056,0.287557,0.106052,0.287554
2,3,0.106246,0.105977,0.105974,0.287912,0.105975,0.287917
3,4,0.106457,0.105996,0.105999,0.287775,0.105994,0.287779
4,5,0.106520,0.105997,0.106001,0.287743,0.105999,0.287740
...,...,...,...,...,...,...,...
6095,6096,0.106325,0.105991,0.105986,0.287855,0.105987,0.287856
6096,6097,0.106560,0.106004,0.106011,0.287705,0.106006,0.287714
6097,6098,0.106681,0.106036,0.106034,0.287605,0.106028,0.287616
6098,6099,0.106426,0.105999,0.106002,0.287790,0.106001,0.287781


In [174]:
pd.DataFrame(pred)

Unnamed: 0,0,1,2,3,4,5
0,0.108133,0.106180,0.106221,0.286608,0.106171,0.286689
1,0.107717,0.106093,0.106119,0.286948,0.106100,0.287023
2,0.107549,0.106078,0.106068,0.287095,0.106081,0.287128
3,0.108822,0.106189,0.106223,0.286157,0.106183,0.286427
4,0.107389,0.106089,0.106077,0.287200,0.106037,0.287208
...,...,...,...,...,...,...
6095,0.108278,0.106147,0.106175,0.286552,0.106136,0.286713
6096,0.107446,0.106158,0.106167,0.287083,0.106133,0.287013
6097,0.107697,0.106066,0.106107,0.286976,0.106070,0.287084
6098,0.108226,0.106214,0.106224,0.286459,0.106238,0.286638


In [88]:
result.to_csv("DACON.csv", index = False)

In [73]:
result = pd.concat([test_, pd.DataFrame(np.mean(pred, axis = 0))], axis = 1).iloc[:, 1:]

In [75]:
result

Unnamed: 0,id,0
0,1636,0.106431
1,2045,0.106001
2,3766,0.106000
3,576,0.287783
4,5634,0.105999
...,...,...
6095,679,
6096,1267,
6097,1214,
6098,391,
