In [1]:
import pandas as pd
import numpy as np
import os
import shutil
from tqdm import tqdm
from glob import glob
import librosa
import warnings

warnings.filterwarnings("ignore")

# 데이터 불러오기

In [3]:
sample_submission = pd.read_csv("./acc/sample_submission.csv")

africa_train_paths = glob("./acc/train/africa/*.wav")
australia_train_paths = glob("./acc/train/australia/*.wav")
canada_train_paths = glob("./acc/train/canada/*.wav")
england_train_paths = glob("./acc/train/england/*.wav")
hongkong_train_paths = glob("./acc/train/hongkong/*.wav")
us_train_paths = glob("./acc/train/us/*.wav")

path_list = [africa_train_paths, australia_train_paths, canada_train_paths,
             england_train_paths, hongkong_train_paths, us_train_paths]

In [13]:
# glob로 test data의 path를 불러올때 순서대로 로드되지 않을 경우를 주의해야 합니다.
# test_ 데이터 프레임을 만들어서 나중에 sample_submission과 id를 기준으로 merge시킬 준비를 합니다.

def get_id(data):
    return np.int(data.split("/")[3].split(".")[0])

test_ = pd.DataFrame(index = range(0, 6100), columns = ["path", "id"])
test_["path"] = glob("./acc/test/*.wav")
test_["id"] = test_["path"].apply(lambda x : get_id(x))

test_.head()

Unnamed: 0,path,id
0,./acc/test/1636.wav,1636
1,./acc/test/2045.wav,2045
2,./acc/test/3766.wav,3766
3,./acc/test/576.wav,576
4,./acc/test/5634.wav,5634


# 데이터 전처리

In [2]:
def load_data(paths):

    result = []
    for path in tqdm(paths):
        # sr = 16000이 의미하는 것은 1초당 16000개의 데이터를 샘플링 한다는 것입니다.
        data, sr = librosa.load(path, sr = 44100)
        result.append(data)
    result = np.array(result) 
    # 메모리가 부족할 때는 데이터 타입을 변경해 주세요 ex) np.array(data, dtype = np.float32)

    return result

In [None]:
# train 데이터를 로드하기 위해서는 많은 시간이 소모 됩니다.
# 따라서 추출된 정보를 npy파일로 저장하여 필요 할 때마다 불러올 수 있게 준비합니다.

os.mkdir("./npy_data")

africa_train_data = load_data(africa_train_paths)
np.save("./npy_data/africa_npy", africa_train_data)

australia_train_data = load_data(australia_train_paths)
np.save("./npy_data/australia_npy", australia_train_data)

canada_train_data = load_data(canada_train_paths)
np.save("./npy_data/canada_npy", canada_train_data)

england_train_data = load_data(england_train_paths)
np.save("./npy_data/england_npy", england_train_data)

hongkong_train_data = load_data(hongkong_train_paths)
np.save("./npy_data/hongkong_npy", hongkong_train_data)

us_train_data = load_data(us_train_paths)
np.save("./npy_data/us_npy", us_train_data)

test_data = load_data(test_["path"])
np.save("./npy_data/test_npy", test_data)

In [3]:
# npy파일로 저장된 데이터를 불러옵니다.
africa_train_data = np.load("./npy_data/africa_npy.npy", allow_pickle = True)
australia_train_data = np.load("./npy_data/australia_npy.npy", allow_pickle = True)
canada_train_data = np.load("./npy_data/canada_npy.npy", allow_pickle = True)
england_train_data = np.load("./npy_data/england_npy.npy", allow_pickle = True)
hongkong_train_data = np.load("./npy_data/hongkong_npy.npy", allow_pickle = True)
us_train_data = np.load("./npy_data/us_npy.npy", allow_pickle = True)

test_data = np.load("./npy_data/test_npy.npy", allow_pickle = True)

train_data_list = [africa_train_data, australia_train_data, canada_train_data, england_train_data, hongkong_train_data, us_train_data]

In [4]:
# 이번 대회에서 음성은 각각 다른 길이를 갖고 있습니다.
# baseline 코드에서는 음성 중 길이가 가장 작은 길이의 데이터를 기준으로 데이터를 잘라서 사용합니다.

def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

#음성들의 길이를 맞춰줍니다.

def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result


#def






#feature를 생성합니다.

def get_feature(data, sr = 44100, n_fft = 1042, win_length = 512, hop_length = 512, n_mels = 64):
    mel = []
    for i in data:
        # win_length 는 음성을 작은 조각으로 자를때 작은 조각의 크기입니다.
        # hop_length 는 음성을 작은 조각으로 자를때 자르는 간격을 의미합니다.
        # n_mels 는 적용할 mel filter의 개수입니다.
        mel_ = librosa.feature.melspectrogram(i, sr = sr, n_fft = n_fft, win_length = win_length, hop_length = hop_length, n_mels = n_mels)
        mel.append(mel_)
    mel = np.array(mel)
    mel = librosa.power_to_db(mel, ref = np.max)

    mel_mean = mel.mean()
    mel_std = mel.std()
    mel = (mel - mel_mean) / mel_std

    return mel

In [5]:
train_x = np.concatenate(train_data_list, axis= 0)
test_x = np.array(test_data)

# 음성의 길이 중 가장 작은 길이를 구합니다.

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

mini = np.min([train_mini, test_mini])

# data의 길이를 가장 작은 길이에 맞춰 잘라줍니다.

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

# librosa를 이용해 feature를 추출합니다.

train_x = get_feature(data = train_x)
test_x = get_feature(data = test_x)

train_x = train_x.reshape(-1, train_x.shape[1], train_x.shape[2], 1)
test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

In [6]:
train_x.shape

(25520, 64, 157, 1)

In [7]:
train_y = np.concatenate((np.zeros(len(africa_train_data), dtype = np.int),
                        np.ones(len(australia_train_data), dtype = np.int),
                         np.ones(len(canada_train_data), dtype = np.int) * 2,
                         np.ones(len(england_train_data), dtype = np.int) * 3,
                         np.ones(len(hongkong_train_data), dtype = np.int) * 4,
                         np.ones(len(us_train_data), dtype = np.int) * 5), axis = 0)

In [8]:
train_y = pd.get_dummies(train_y).to_numpy(dtype = 'long')

In [9]:
train_y.shape

(25520, 6)

In [10]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
import timm
device='cuda'

In [11]:
class ToTensor(object):
    """numpy array를 tensor(torch)로 변환합니다."""
    def __call__(self, sample):
        x, y = sample['x'], sample['y']
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        #x = x.transpose((2, 0, 1))
        return {'x': torch.FloatTensor(x),
                'y': torch.FloatTensor(y)}
to_tensor = transforms.Compose([
                      ToTensor() 
])
class CustomDataset(torch.utils.data.Dataset): 
    def __init__(self,train_x,train_y,transforms=to_tensor):
        self.x_data = train_x
        self.y_data = train_y
        self.transforms = transforms# Transform

    def __len__(self):
        return len(self.x_data)#x_data길이

    def __getitem__(self, idx): 
        
        x = self.x_data[idx]
        y = self.y_data[idx]
        sample = {'x': x, 'y': y}
        if self.transforms:
            sample = self.transforms(sample)
        y = y.astype(np.float32)
        
        
        return (x,y)

In [19]:
class MultiLabelResnet(nn.Module):
    def __init__(self):
        super(MultiLabelResnet, self).__init__()
        self.conv2d = nn.Conv2d(64, 3, 1, stride=1)
        self.resnet = timm.create_model('res2next50', pretrained=False) 
        self.FC = nn.Linear(1000, 6)

    def forward(self, x):
        # resnet의 입력은 [3, N, N]으로
        # 3개의 채널을 갖기 때문에
        # resnet 입력 전에 conv2d를 한 층 추가
        x = F.relu(self.conv2d(x))

        # resnet18을 추가
        x = F.relu(self.resnet(x))

        # 마지막 출력에 nn.Linear를 추가
        # multilabel을 예측해야 하기 때문에
        # softmax가 아닌 sigmoid를 적용
        #x = self.FC(x)
        x = torch.sigmoid(self.FC(x))
        return x
# 모델 선언

model = MultiLabelResnet()
model.to(device)

MultiLabelResnet(
  (conv2d): Conv2d(64, 3, kernel_size=(1, 1), stride=(1, 1))
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottle2neck(
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (convs): ModuleList(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
          (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
          (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
        )
        (bns): ModuleList

In [20]:
#gpu 장착
model.to(device)

MultiLabelResnet(
  (conv2d): Conv2d(64, 3, kernel_size=(1, 1), stride=(1, 1))
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottle2neck(
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (convs): ModuleList(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
          (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
          (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
        )
        (bns): ModuleList

In [25]:
dataset = CustomDataset(train_x,train_y)
dataloader = DataLoader(dataset, batch_size=128,shuffle=True)

In [22]:
for idx, sample in enumerate(dataloader):
    print(idx, sample[0].size(),
          sample[0].size())


0 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
3 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
4 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
5 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
6 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
7 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
8 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
9 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
10 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
11 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
12 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
13 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
14 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
15 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
16 torch.Size([10, 64, 157, 1]) to

212 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
213 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
214 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
215 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
216 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
217 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
218 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
219 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
220 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
221 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
222 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
223 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
224 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
225 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
226 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
227 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
228 torc

605 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
606 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
607 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
608 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
609 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
610 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
611 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
612 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
613 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
614 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
615 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
616 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
617 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
618 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
619 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
620 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
621 torc

1022 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1023 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1024 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1025 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1026 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1027 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1028 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1029 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1030 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1031 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1032 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1033 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1034 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1035 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1036 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1037 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 1

1427 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1428 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1429 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1430 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1431 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1432 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1433 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1434 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1435 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1436 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1437 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1438 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1439 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1440 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1441 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1442 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 1

1776 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1777 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1778 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1779 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1780 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1781 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1782 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1783 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1784 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1785 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1786 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1787 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1788 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1789 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1790 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
1791 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 1

2140 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2141 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2142 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2143 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2144 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2145 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2146 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2147 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2148 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2149 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2150 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2151 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2152 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2153 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2154 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2155 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 1

2517 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2518 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2519 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2520 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2521 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2522 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2523 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2524 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2525 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2526 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2527 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2528 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2529 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2530 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2531 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 157, 1])
2532 torch.Size([10, 64, 157, 1]) torch.Size([10, 64, 1

In [26]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001,momentum=0.9)
criterion  = torch.nn.MultiLabelSoftMarginLoss()


In [31]:
for epoch in range(4):

    losses = []

    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        # get the inputs
        images, labels = data
        images = images.to(device)
        labels = labels.to(device) 

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        probs = model(images)
        loss = criterion(probs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 199:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
              (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

[1,   200] loss: 0.825
[2,   200] loss: 0.803
[3,   200] loss: 0.785
[4,   200] loss: 0.769


In [35]:
probs

tensor([[0.1223, 0.0786, 0.0650, 0.4470, 0.0765, 0.5948],
        [0.0906, 0.0527, 0.0500, 0.4730, 0.0586, 0.5914],
        [0.1923, 0.1741, 0.1529, 0.4990, 0.1575, 0.5501],
        [0.1729, 0.1381, 0.1341, 0.5010, 0.1257, 0.5376],
        [0.2025, 0.1702, 0.1593, 0.5002, 0.1600, 0.5335],
        [0.1006, 0.0669, 0.0779, 0.4674, 0.0738, 0.6013],
        [0.1238, 0.0968, 0.0858, 0.4915, 0.0909, 0.5445],
        [0.1608, 0.1259, 0.1215, 0.4935, 0.1230, 0.5588],
        [0.1784, 0.1437, 0.1269, 0.4952, 0.1203, 0.5461],
        [0.1529, 0.1097, 0.1045, 0.4978, 0.1132, 0.5484],
        [0.1547, 0.1350, 0.1333, 0.5071, 0.1248, 0.5607],
        [0.1154, 0.0752, 0.0834, 0.4959, 0.0730, 0.5706],
        [0.1608, 0.1271, 0.1153, 0.5071, 0.1223, 0.5396],
        [0.1659, 0.1214, 0.1268, 0.4819, 0.1275, 0.5574],
        [0.1901, 0.1660, 0.1521, 0.4933, 0.1478, 0.5248],
        [0.1785, 0.1531, 0.1400, 0.5028, 0.1358, 0.5368],
        [0.1631, 0.1259, 0.1264, 0.5001, 0.1323, 0.5373],
        [0.179

In [24]:
from tqdm import tqdm
epochs=10

for epoch in range(epochs):
    print('Epoch: [{}/{}]'.format(epoch+1, epochs))

# 모델의 dropoupt, batchnormalization를 train 모드로 설정
    pbar = tqdm(dataloader)
    model.train()
    train_acc_list = []
    for batch in pbar:
        images = images.to(device)
        labels = labels.to(device) 
        
        probs = model(images)
        probs = F.softmax(probs)
        loss = criterion(probs, labels)
        
        optimizer.zero_grad()
        
        loss.backward()
        optimizer.step()  
        
        probs  = probs.cpu().detach().numpy()
        labels = labels.cpu().detach().numpy()
            cnt = 0
            for i in range(256):
                if probs[i].argmax() == labels[i].argmax():
                    cnt +=1
            
            batch_acc = cnt/10
            train_acc_list.append(batch_acc)
            train_acc = np.mean(train_acc_list)
            
            
            
            
            
            #if idx == 224 :
            #    break
            #train_bar.set_description(f"Train Epoch {epoch}")
            # 갱신할 변수들에 대한 모든 변화도를 0으로 초기화
            # 참고)https://tutorials.pytorch.kr/beginner/pytorch_with_examples.html
            #optimizer.zero_grad()

            #images, labels = sample
            # tensor를 gpu에 올리기 
            #images = images.to(device)
            #labels = labels.to(device)


            
            # .forward()에서 중간 노드의 gradient를 계산
            with torch.set_grad_enabled(True):
                # 모델 예측
                probs = model(images)
                probs = F.softmax(probs)
               # probs = (probs == probs.max()) * 1.0
                #loss = criterion(probs, y_train)
                
                
                loss = criterion(probs, labels)
                #loss = criterion(probs, torch.max(y_train, 1)[1])

                loss.backward()
                optimizer.step()
                
                probs  = probs.cpu().detach().numpy()
                labels = labels.cpu().detach().numpy()
                # train accuracy 계산
                cnt = 0
                for i in range(10):
                    
                    if probs[i].argmax() == labels[i].argmax():
                        cnt +=1
                
                #preds = probs > 0.5
                #batch_acc = (labels == preds).mean()
                batch_acc = cnt/10
                train_acc_list.append(batch_acc)
                train_acc = np.mean(train_acc_list)

            # 현재 progress bar에 현재 미니배치의 loss 결과 출력
            train_bar.set_postfix(train_loss= loss.item(),
                                    train_acc = train_acc)
            

Train Epoch 0: 100%|██████████| 2552/2552 [01:42<00:00, 24.92batch/s, train_acc=0.364, train_loss=0.444]
Train Epoch 1: 100%|██████████| 2552/2552 [01:41<00:00, 25.13batch/s, train_acc=0.391, train_loss=0.439]
Train Epoch 2: 100%|██████████| 2552/2552 [01:41<00:00, 25.03batch/s, train_acc=0.392, train_loss=0.433]
Train Epoch 3: 100%|██████████| 2552/2552 [01:42<00:00, 24.97batch/s, train_acc=0.392, train_loss=0.426]
Train Epoch 4: 100%|██████████| 2552/2552 [01:41<00:00, 25.23batch/s, train_acc=0.392, train_loss=0.434]
Train Epoch 5:  32%|███▏      | 804/2552 [00:32<01:09, 24.99batch/s, train_acc=0.39, train_loss=0.416] 


KeyboardInterrupt: 

In [315]:
len(probs)


10

In [34]:
276*9

2484

In [35]:
176

176

In [41]:
(256 * 99) + 176

25520

In [39]:
25600+176

25776

In [None]:
25520

# 예측

In [316]:
model.eval()

Cnn14(
  (bn0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_block1): ConvBlock(
    (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block2): ConvBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block3): ConvBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),

In [331]:
test_y = pd.DataFrame(index=range(0,len(test_x)), columns=['0', '1', '2', '3', '4', '5'])
test_y = test_y.fillna(0).to_numpy()
dataset = CustomDataset(test_x,test_y)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)


In [327]:
model

Cnn14(
  (bn0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_block1): ConvBlock(
    (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block2): ConvBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block3): ConvBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1),

In [332]:
len(dataloader)

610

In [333]:
len(dataset)

6100

In [334]:
for idx, sample in enumerate(dataloader):
    print(idx, sample[0].size(),
          sample[0].size())



0 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
1 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
2 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
3 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
4 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
5 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
6 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
7 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
8 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
9 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
10 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
11 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
12 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
13 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
14 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
15 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
16 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
17 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
18 torch.Size([10, 1

424 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
425 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
426 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
427 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
428 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
429 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
430 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
431 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
432 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
433 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
434 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
435 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
436 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
437 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
438 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
439 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
440 torch.Size([10, 157, 64]) torch.Size([10, 157, 64])
441 torch.Size([10, 157, 64]) torch.Size([10, 15

In [335]:
pred_ = []
for idx, sample in enumerate(dataloader):
    with torch.no_grad():
        # 추론
        model.eval()
        images,_ = sample
        images = images.to(device)
        probs  = model(images)
        probs = F.softmax(probs)
        probs = probs.cpu().detach().numpy()
        pred_.append(probs)
      

In [318]:
test_y

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [91]:
#len(dataloader) 6080 

SyntaxError: invalid syntax (<ipython-input-91-1e9c4e5abe43>, line 1)

In [336]:
pred_

[array([[0.10646551, 0.10603394, 0.10605103, 0.28775477, 0.10600454,
         0.28769016],
        [0.10617596, 0.1060644 , 0.10604888, 0.2879123 , 0.1059994 ,
         0.2877991 ],
        [0.11663015, 0.10480133, 0.10476472, 0.28451183, 0.10480931,
         0.28448263],
        [0.10627706, 0.10608319, 0.10602968, 0.28788424, 0.10600552,
         0.28772026],
        [0.14156426, 0.10186376, 0.10180017, 0.27648473, 0.1018355 ,
         0.27645162],
        [0.10626446, 0.10610475, 0.10603039, 0.28786963, 0.10600681,
         0.28772402],
        [0.10648772, 0.10606464, 0.10598737, 0.2878051 , 0.10598476,
         0.28767037],
        [0.11052866, 0.10556948, 0.10550728, 0.28648654, 0.10554117,
         0.28636694],
        [0.10618132, 0.10606144, 0.10601728, 0.28790128, 0.10601119,
         0.28782746],
        [0.10609248, 0.1060765 , 0.10604427, 0.2879359 , 0.10602044,
         0.2878304 ]], dtype=float32),
 array([[0.10622569, 0.10605546, 0.10604046, 0.28791776, 0.10599365,
    

In [None]:
212

99*64 
6100

In [150]:
del pred

In [158]:
import itertools

In [338]:
pred=list(itertools.chain(*pred_))

In [340]:
len(pred)

6100

In [341]:
pred

[array([0.10646551, 0.10603394, 0.10605103, 0.28775477, 0.10600454,
        0.28769016], dtype=float32),
 array([0.10617596, 0.1060644 , 0.10604888, 0.2879123 , 0.1059994 ,
        0.2877991 ], dtype=float32),
 array([0.11663015, 0.10480133, 0.10476472, 0.28451183, 0.10480931,
        0.28448263], dtype=float32),
 array([0.10627706, 0.10608319, 0.10602968, 0.28788424, 0.10600552,
        0.28772026], dtype=float32),
 array([0.14156426, 0.10186376, 0.10180017, 0.27648473, 0.1018355 ,
        0.27645162], dtype=float32),
 array([0.10626446, 0.10610475, 0.10603039, 0.28786963, 0.10600681,
        0.28772402], dtype=float32),
 array([0.10648772, 0.10606464, 0.10598737, 0.2878051 , 0.10598476,
        0.28767037], dtype=float32),
 array([0.11052866, 0.10556948, 0.10550728, 0.28648654, 0.10554117,
        0.28636694], dtype=float32),
 array([0.10618132, 0.10606144, 0.10601728, 0.28790128, 0.10601119,
        0.28782746], dtype=float32),
 array([0.10609248, 0.1060765 , 0.10604427, 0.2879359 ,

In [151]:
pred_

[array([[0.1081325 , 0.10617962, 0.10622071, 0.2866078 , 0.10617088,
         0.2866885 ],
        [0.10771678, 0.10609294, 0.10611901, 0.28694767, 0.1061004 ,
         0.28702322],
        [0.10754948, 0.10607797, 0.10606804, 0.28709522, 0.10608145,
         0.28712782],
        [0.10882153, 0.10618911, 0.10622294, 0.28615683, 0.10618307,
         0.28642657],
        [0.1073886 , 0.10608907, 0.10607734, 0.28720015, 0.10603659,
         0.28720826],
        [0.10768888, 0.1060693 , 0.10605559, 0.28701654, 0.10605422,
         0.28711542],
        [0.10741203, 0.10607852, 0.10605197, 0.28718337, 0.10606293,
         0.28721115],
        [0.10813162, 0.10618096, 0.10617853, 0.28660873, 0.10614582,
         0.28675428],
        [0.10715533, 0.10606823, 0.10606129, 0.2873172 , 0.10603657,
         0.28736144],
        [0.10794112, 0.10619339, 0.10621163, 0.2866997 , 0.10618571,
         0.28676844]], dtype=float32),
 array([[0.10831267, 0.10620923, 0.10620708, 0.28645787, 0.10622188,
    

In [320]:
pred_[0][9]

array([0.10622359, 0.10605875, 0.10604201, 0.28786305, 0.10599991,
       0.28781265], dtype=float32)

In [165]:
np.mean(pred_, axis = 0).shape

(10, 6)

In [328]:
len(pred)

6100

In [322]:
pd.DataFrame(np.mean(pred, axis = 0))

Unnamed: 0,0
0,0.107848
1,0.106136
2,0.106138
3,0.286825
4,0.106132
5,0.28692


(6100, 64, 501, 1)

In [323]:
test_

Unnamed: 0,path,id
0,./acc/test/1636.wav,1636
1,./acc/test/2045.wav,2045
2,./acc/test/3766.wav,3766
3,./acc/test/576.wav,576
4,./acc/test/5634.wav,5634
...,...,...
6095,./acc/test/679.wav,679
6096,./acc/test/1267.wav,1267
6097,./acc/test/1214.wav,1214
6098,./acc/test/391.wav,391


In [342]:
def cov_type(data):
    return np.int(data)

# 처음에 살펴본 것처럼 glob로 test data의 path는 sample_submission의 id와 같이 1,2,3,4,5.....으로 정렬 되어있지 않습니다.
# 만들어둔 test_ 데이터프레임을 이용하여 sample_submission과 predict값의 id를 맞춰줍니다.
sample_submission = pd.read_csv("./acc/sample_submission.csv")
result = pd.concat([test_, pd.DataFrame(pred)], axis = 1).iloc[:, 1:]
result["id"] = result["id"].apply(lambda x : cov_type(x))

result = pd.merge(sample_submission["id"], result)
result.columns = sample_submission.columns

In [343]:
result

Unnamed: 0,id,africa,australia,canada,england,hongkong,us
0,1,0.106158,0.106066,0.106057,0.287897,0.106004,0.287818
1,2,0.106417,0.106101,0.106022,0.287805,0.106019,0.287636
2,3,0.106354,0.106047,0.105977,0.287880,0.105985,0.287757
3,4,0.107334,0.105964,0.105938,0.287448,0.105901,0.287415
4,5,0.107380,0.105905,0.105861,0.287539,0.105861,0.287454
...,...,...,...,...,...,...,...
6095,6096,0.106553,0.106052,0.106024,0.287741,0.106002,0.287628
6096,6097,0.110028,0.105600,0.105556,0.286572,0.105597,0.286647
6097,6098,0.106478,0.106014,0.105983,0.287821,0.105994,0.287710
6098,6099,0.106264,0.105990,0.105972,0.287944,0.105970,0.287859


In [174]:
pd.DataFrame(pred)

Unnamed: 0,0,1,2,3,4,5
0,0.108133,0.106180,0.106221,0.286608,0.106171,0.286689
1,0.107717,0.106093,0.106119,0.286948,0.106100,0.287023
2,0.107549,0.106078,0.106068,0.287095,0.106081,0.287128
3,0.108822,0.106189,0.106223,0.286157,0.106183,0.286427
4,0.107389,0.106089,0.106077,0.287200,0.106037,0.287208
...,...,...,...,...,...,...
6095,0.108278,0.106147,0.106175,0.286552,0.106136,0.286713
6096,0.107446,0.106158,0.106167,0.287083,0.106133,0.287013
6097,0.107697,0.106066,0.106107,0.286976,0.106070,0.287084
6098,0.108226,0.106214,0.106224,0.286459,0.106238,0.286638


In [344]:
result.to_csv("DACON.csv", index = False)

In [None]:
result = pd.concat([test_, pd.DataFrame(np.mean(pred, axis = 0))], axis = 1).iloc[:, 1:]

In [58]:
test_

Unnamed: 0,path,id
0,./acc/test/1636.wav,1636
1,./acc/test/2045.wav,2045
2,./acc/test/3766.wav,3766
3,./acc/test/576.wav,576
4,./acc/test/5634.wav,5634
...,...,...
6095,./acc/test/679.wav,679
6096,./acc/test/1267.wav,1267
6097,./acc/test/1214.wav,1214
6098,./acc/test/391.wav,391
