In [59]:
import librosa
import librosa.display
import torch
import sys
import glob
from torch.utils.data import Dataset
import matplotlib.pyplot as plt

sys.path.insert(1, '../utils')

In [62]:
from data import StutterData

In [61]:
data

<module 'data' from '../utils/data.py'>

In [55]:
class StutterData(Dataset):
    
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.files = []
        
        for path in glob.glob(root_dir):
            self.files.append(path)
            
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        audio, sr = librosa.core.load(self.files[idx])
        mfcc = librosa.feature.mfcc(audio, sr) 
#         add n_mfcc?
        
        if 'non-stuttered' in self.files[idx]:
            label = 0
        else: 
            label = 1
        sample ={'mfcc': mfcc, 'label':label}
        
        return sample
    
    def visualize(self,idx):
        librosa.display.specshow(sd[idx]['mfcc'], x_axis='time')

In [56]:
sd = StutterData('../data/*')

In [63]:
import torch.nn as nn
import torch.nn.functional as F

In [135]:
class VariableLenModel(nn.Module):
    def __init__(self):
        super(VariableLenModel, self).__init__()
        
        self.conv1 = nn.Conv2d(1,8,3)
        self.maxpool1 = nn.AdaptiveAvgPool2d(16)
        self.dropout = nn.Dropout(0.2)
        
        self.conv2 = nn.Conv2d(8,16,3)
        self.maxpool2 = nn.AdaptiveAvgPool2d(8)
        
        self.fc1 = nn.Linear(1024, 256) #8*8 = adaptivemaxpool, *16 = conv2d op
        self.fc2 = nn.Linear(256, 1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = F.relu(x)
        x = self.dropout(x)

        x = self.conv2(x)
        x = self.maxpool2(x)
        x = F.relu(x)
        x = self.dropout(x)
        print(x.shape)
        
        x = x.view(-1,8*8*16)
        x = self.fc1(x)
        x = self.fc2(x)

        return x

        

In [136]:
model = VariableLenModel()

In [137]:
print(model)

VariableLenModel(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (maxpool1): AdaptiveAvgPool2d(output_size=16)
  (dropout): Dropout(p=0.2, inplace=False)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (maxpool2): AdaptiveAvgPool2d(output_size=8)
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=1, bias=True)
)


In [138]:
from pytorch_model_summary import summary


In [139]:
print(summary(VariableLenModel(), torch.zeros((1, 1, 20, 50)), show_input=True))

torch.Size([1, 16, 8, 8])
---------------------------------------------------------------------------
          Layer (type)         Input Shape         Param #     Tr. Param #
              Conv2d-1      [1, 1, 20, 50]              80              80
   AdaptiveAvgPool2d-2      [1, 8, 18, 48]               0               0
             Dropout-3      [1, 8, 16, 16]               0               0
              Conv2d-4      [1, 8, 16, 16]           1,168           1,168
   AdaptiveAvgPool2d-5     [1, 16, 14, 14]               0               0
              Linear-6           [1, 1024]         262,400         262,400
              Linear-7            [1, 256]             257             257
Total params: 263,905
Trainable params: 263,905
Non-trainable params: 0
---------------------------------------------------------------------------


In [140]:
model.forward(torch.zeros((1, 1, 20, 50)))

torch.Size([1, 16, 8, 8])


tensor([[0.0676]], grad_fn=<AddmmBackward>)

In [141]:
criterion = nn.BCEWithLogitsLoss()

In [142]:
trainloader = torch.utils.data.DataLoader(sd, batch_size = 8)

In [143]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")     #Check whether a GPU is present.