# 기초 MLP 모델 실습
- MLP model을 이용하여 오디오에서 숫자를 분류하는 실습
1. Dataset 정의 
2. Model 정의
3. Train
4. Inference

## Torch Dataset
- 앞선 실습에서 사용했던 free-spoken-digit-dataset을 데이터로 사용

In [None]:
!git clone https://github.com/Jakobovski/free-spoken-digit-dataset

Import packages

In [None]:
import torch
import torchaudio   
from pathlib import Path 

## PyTorch Dataset Object 개념 
사용자 정의 Dataset 클래스를 생성하려면 `torch.utils.data.Dataset`를 상속받아 `__init__`, `__len__`, `__getitem__` 메소드를 오버라이드한다.

- `__init__`: 데이터셋을 초기화

- `__len__`: 데이터셋의 아이템 수 반환

- `__getitem__`: 인덱스를 입력하면 데이터 샘플과 레이블을 반환


[그림]데이터를 직접적으로 가지고 있지 않지만 `__len__` 과 `__getitem__`을 통해 접근가능

<img src="https://drek4537l1klr.cloudfront.net/stevens2/Figures/CH07_F02_Stevens2_GS.png" width=600>

`torch.utils.data.DataLoader`는 Dataset에서 제공하는 데이터를 배치 크기 단위로 불러오는 iteration을 만든다. 

In [None]:
??Dataset

In [None]:
??DataLoader

In [None]:
# simple __len__,  __getitem__ method

x = [0,1,2,3]
print(type(x))
# print(dir(x)) # 모든 변수와 메소드 반환

# __len__
print(len(x))

# __getitem__
print(x[0])

In [None]:
from torch.utils.data import Dataset, DataLoader

## Dataset Class 의 형식

아래와 같이 3개의 method를 override하여 작성

```python
class MyCustomDataset(Dataset):
    def __init__(self, ...):
        # initialize Class

    def __len__(self):
        # of how many examples(images?) you have
        return count 

    def __getitem__(self, index):
        # return samlpe(data) and label 
        return (img, label)
```

### AudioDataSet_v1
사용자 정의 Dataset인 'AudioDataSet' 

선언될 때(초기)에는 전체 데이터셋의 파일 목록을 가져오고

호출될때에는 샘플의 경로를 리턴하도록 구현

In [None]:
# define AudioDataSet and create an instance of AudioDataSet 

class AudioDataSet(Dataset) : 
  def __init__(self):
    

  def __len__(self):    
    

  def __getitem__(self,idx):
    

AudioData = AudioDataSet()

In [None]:
# len


In [None]:
# getitem


### AudioDataSet_v2
return audio tensor and label

In [None]:
# redefine AudioDataSet 


In [None]:
# getitem


### AudioDataSet_v3
split train/test

data : pads to a length of 16000 with zeros -> to match input size

label : one-hot encode into tensor of size 10

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

max_len = 0
lengths = []

for i in range(len(AudioData)):
    audio, _ = AudioData[i]
    length = audio.shape[1]
    max_len = max(max_len, length)
    lengths.append(length)

print("Max audio length:", max_len)

plt.hist(lengths, bins=50, edgecolor='black')
plt.xlabel("Audio Length")
plt.ylabel("Count")
plt.title("Distribution of Audio Lengths")
plt.show()

In [None]:
max_length = 8000

In [None]:
# redefine AudioDataSet


In [None]:
# define train and test dataset


In [None]:
# getitem


## Model
- feature extraction
    - Mel-Spectrogram
    - Amplitued to DB
- model 정의

In [None]:
import torch.nn as nn

In [None]:
# define MyModel

from math import ceil 

class MyModel(nn.Module):
  def __init__(self , hidden_dim = 1024):
    super().__init__()

    # define parameter
    self.n_mels = 64
    self.n_fft = 256
    self.hop_length=self.n_fft//2
    self.sr = 8000
    self.n_frames = ceil(8000/self.hop_length)

    self.n_classes = 10   # digit 0-9 
    self.hidden_dim = hidden_dim


    # define mel_converter
    self.mel_converter = torchaudio.transforms.MelSpectrogram(sample_rate=self.sr,
                                                              n_fft=self.n_fft,
                                                              hop_length=self.hop_length,
                                                              n_mels=self.n_mels)
    # define db_converter
    self.db_converter = torchaudio.transforms.AmplitudeToDB()

    # define layer sequence
    self.nn_layer = nn.Sequential(
                          nn.Linear(self.n_mels*self.n_frames,self.hidden_dim),
                          nn.ReLU(),
                          nn.Linear(self.hidden_dim,self.hidden_dim),                                   
                          nn.ReLU(),
                          nn.Linear(self.hidden_dim,self.hidden_dim),                                   
                          nn.ReLU(),
                          nn.Linear(self.hidden_dim,self.hidden_dim),                                   
                          nn.ReLU(),
                          nn.Linear(self.hidden_dim,self.n_classes))

  def forward(self, x):
    # feature extraction
    # TODO
    
    # model
    # TODO
    
    return x


model = MyModel()     

In [None]:
# check train sample



In [None]:
# check shape


In [None]:
# model output softmax


# Train 

In [None]:
from tqdm.auto import tqdm

In [None]:
# device check
print(torch.cuda.is_available())

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device,type(device))

In [None]:
# Train loop


# define epoch
n_epoch = 50

# define loss function and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(),lr=0.001)

# define batch size and train_loader, test_loader
bs = 64
train_loader = DataLoader(train,shuffle=True,batch_size=64)
test_loader = DataLoader(test,shuffle=False,batch_size=64)

# loss tracking
losses =[]

# model to device
model.to(device)



# TODO

In [None]:
# check loss

import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(losses)
plt.show()

print(losses[-1])

## Inference

In [None]:
# define validation function



In [None]:
# check length of test dataset


In [None]:
# test set에 대해서 loss 및 accuracy 확인

validation(model,test_loader,loss_fn)

In [None]:
model = MyModel()

n_epoch = 50

loss_fn = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(),lr=0.001)

bs = 64
train_loader = DataLoader(train,shuffle=True,batch_size=64)
test_loader = DataLoader(test,shuffle=False,batch_size=64)

losses =[]

val_losses =[]
val_accs = []

model.to('cuda')



# TODO

## plotting loss, val_loss, val_acc

In [None]:
plt.plot(losses)
plt.yscale('log')

In [None]:
plt.plot(val_losses)

In [None]:
plt.plot(val_accs)