In [1]:
#!pip install tqdm

In [2]:
%load_ext autoreload
%autoreload 2

import time
from preprocess import *
#import keras
#from keras.models import Sequential
#from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, RNN
#from keras.utils import to_categorical


# Second dimension of the feature is dim2
feature_dim_2 = 11

# Save data to array file first
save_data_to_array(max_len=feature_dim_2)

# # Loading train set and test set
X_train, X_test, y_train, y_test = get_train_test()

# # Feature dimension
feature_dim_1 = 20
channel = 1
epochs = 50
batch_size = 100
verbose = 1
num_classes = 2

# Reshaping to perform 2D convolution
X_train = X_train.reshape(X_train.shape[0], feature_dim_1, feature_dim_2, channel)
X_test = X_test.reshape(X_test.shape[0], feature_dim_1, feature_dim_2, channel)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)

Saving vectors of label - 'bangun': 100%|██████████| 154/154 [00:03<00:00, 45.64it/s]
Saving vectors of label - 'cipi': 100%|██████████| 142/142 [00:02<00:00, 61.77it/s]


In [3]:
y_test_hot.shape

(119, 2)

# CONVERT / IMPORT TO DATALOADER

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        x = torch.from_numpy(self.data[index])
        y = torch.from_numpy(self.labels[index])
        return x, y

batch_size = 32
num_workers = 4

train_dataset = CustomDataset(X_train, y_train_hot)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

test_dataset = CustomDataset(X_test, y_test_hot)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [4]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x26f61423cc8>

# DEFINE MODEL PYTORCH

In [None]:
def get_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(feature_dim_1, feature_dim_2, channel)))
    model.add(Conv2D(48, kernel_size=(2, 2), activation='relu'))
    model.add(Conv2D(120, kernel_size=(2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(num_classes, activation='sigmoid'))
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])
    return model



# Predicts one sample
def predict(filepath, model):
    sv,sr = soundfile.read('../Ring09.wav')
    sample = wav2mfcc(filepath)
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    y_pred = model.predict(sample_reshaped)
    y_max = np.max(y_pred)
    ypred = np.argmax(y_pred)
    if y_max>0.9 and ypred==1:
        print("Predicted:" ,get_labels()[0][ypred],y_max, ypred)
        sounddevice.play(sv,samplerate=16000)
    return get_labels()[0][ypred]

# Predicts one sample
def predicte(audiodata, model):
    sv,sr = soundfile.read('../Ring09.wav')
    
    audiodata = np.array(audiodata).flatten()
    sample = au2mfcc(audiodata)
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    y_pred = model.predict(sample_reshaped)
    y_max = np.max(y_pred)
    ypred = np.argmax(y_pred)
    if y_max>0.9:
        print("Predicted:" ,get_labels()[0][ypred], y_max)
        sounddevice.play(sv,samplerate=16000)
    return get_labels()[0][ypred]


In [2]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(channel, 32, kernel_size=2)
        self.conv2 = nn.Conv2d(32, 48, kernel_size=2)
        self.conv3 = nn.Conv2d(48, 120, kernel_size=2)
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.dropout1 = nn.Dropout(p=0.25)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(120*6*6, 128)
        self.dropout2 = nn.Dropout(p=0.25)
        self.fc2 = nn.Linear(128, 64)
        self.dropout3 = nn.Dropout(p=0.4)
        self.fc3 = nn.Linear(64, num_classes)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.relu(self.conv3(x))
        x = self.maxpool(x)
        x = self.dropout1(x)
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout2(x)
        x = nn.functional.relu(self.fc2(x))
        x = self.dropout3(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x
        
model = Model()

def predict(model, data):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.eval()
    with torch.no_grad():
        inputs = torch.from_numpy(data).to(device)
        outputs = model(inputs.float())
        preds = (outputs > 0.5).float().cpu().numpy()
    return preds

# Predicts one sample
def _predict(filepath, model):
    sv,sr = soundfile.read('../Ring09.wav')
    sample = wav2mfcc(filepath)
    sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
    y_pred = model.predict(sample_reshaped)
    y_max = np.max(y_pred)
    ypred = np.argmax(y_pred)
    if y_max>0.9 and ypred==1:
        print("Predicted:" ,get_labels()[0][ypred],y_max, ypred)
        sounddevice.play(sv,samplerate=16000)
    return get_labels()[0][ypred]

# DATALOADER

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        x = torch.from_numpy(self.data[index])
        y = torch.from_numpy(self.labels[index])
        return x, y

# create a CustomDataset object for the training data
train_dataset = CustomDataset(X_train, y_train_hot)

# create a DataLoader object for the training data
batch_size = 100
shuffle = True
num_workers = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

# TRAINER

In [6]:
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    
    for epoch in range(num_epochs):
        running_train_loss, running_val_loss = 0.0, 0.0
        running_train_acc, running_val_acc = 0.0, 0.0
        
        # training loop
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.float())
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()
            
            running_train_loss += loss.item() * inputs.size(0)
            running_train_acc += (outputs > 0.5).eq(labels.byte()).sum().item()
        
        # validation loop
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs.float())
                loss = criterion(outputs, labels.float())
                
                running_val_loss += loss.item() * inputs.size(0)
                running_val_acc += (outputs > 0.5).eq(labels.byte()).sum().item()
        
        # compute epoch statistics
        epoch_train_loss = running_train_loss / len(train_loader.dataset)
        epoch_val_loss = running_val_loss / len(val_loader.dataset)
        epoch_train_acc = running_train_acc / len(train_loader.dataset)
        epoch_val_acc = running_val_acc / len(val_loader.dataset)
        train_losses.append(epoch_train_loss)
        val_losses.append(epoch_val_loss)
        train_accs.append(epoch_train_acc)
        val_accs.append(epoch_val_acc)
        
        # print epoch statistics
        print("Epoch {}/{} - Train Loss: {:.4f} - Val Loss: {:.4f} - Train Acc: {:.4f} - Val Acc: {:.4f}".format(
            epoch+1, num_epochs, epoch_train_loss, epoch_val_loss, epoch_train_acc, epoch_val_acc))
    
    return train_losses, val_losses, train_accs, val_accs

In [46]:
train(model, train_loader, test_loader, criterion= , optimizer= , num_epochs=10)

TypeError: train() missing 2 required positional arguments: 'criterion' and 'optimizer'

__main__.Model

In [12]:
import torch.optim as optim
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(channel, 32, kernel_size=2)
        self.conv2 = nn.Conv2d(32, 48, kernel_size=2)
        self.conv3 = nn.Conv2d(48, 120, kernel_size=2)
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.dropout1 = nn.Dropout(p=0.25)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(120*6*6, 128)
        self.dropout2 = nn.Dropout(p=0.25)
        self.fc2 = nn.Linear(128, 64)
        self.dropout3 = nn.Dropout(p=0.4)
        self.fc3 = nn.Linear(64, num_classes)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.relu(self.conv3(x))
        x = self.maxpool(x)
        x = self.dropout1(x)
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout2(x)
        x = nn.functional.relu(self.fc2(x))
        x = self.dropout3(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x
        
model = Model()

# Set up the model, loss function, and optimizer
#model = get_model()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# Set up the data loaders
batch_size = 100
num_workers = 4
train_dataset = CustomDataset(X_train, y_train_hot)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_dataset = CustomDataset(X_test, y_test_hot)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    # Set the model to training mode
    model.train()

    # Iterate over the training data in batches
    train_loss = 0.0
    train_acc = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Compute the training accuracy
        preds = torch.round(torch.sigmoid(outputs))
        correct = (preds == labels).sum().item()
        acc = correct / labels.numel()

        # Update the training loss and accuracy
        train_loss += loss.item() * inputs.size(0)
        train_acc += acc * inputs.size(0)

    # Compute the average training loss and accuracy for the epoch
    train_loss /= len(train_loader.dataset)
    train_acc /= len(train_loader.dataset)

    # Set the model to evaluation mode and turn off gradients
    model.eval()
    with torch.no_grad():
        # Iterate over the test data in batches
        test_loss = 0.0
        test_acc = 0.0
        for inputs, labels in test_loader:
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Compute the test accuracy
            preds = torch.round(torch.sigmoid(outputs))
            correct = (preds == labels).sum().item()
            acc = correct / labels.numel()

            # Update the test loss and accuracy
            test_loss += loss.item() * inputs.size(0)
            test_acc += acc * inputs.size(0)

        # Compute the average test loss and accuracy for the epoch
        test_loss /= len(test_loader.dataset)
        test_acc /= len(test_loader.dataset)

    # Print the training and test loss and accuracy for the epoch
    print(f'Epoch {epoch+1}/{num_epochs} -- Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')

BrokenPipeError: [Errno 32] Broken pipe

In [10]:
for i in train_loader:
    print(i)

BrokenPipeError: [Errno 32] Broken pipe

# Building The Model Then Training it

In [None]:
model = get_model()
model.fit(X_train, y_train_hot, batch_size=batch_size, epochs=epochs, verbose=2, validation_data=(X_test, y_test_hot))

## Prediction

In [5]:
#model.save("chippy_v1.model")

In [None]:
import keras
model = keras.models.load_model("chippy_v1.model")

In [20]:
import soundfile
import itertools
import time
import librosa
import sounddevice

DURATION=20
gain=10
srange=[100,2000]
high=2000
low=100
screenwidth=79

def record(length=1, reclength=1, filename=None, thres=0):
    """ 
    Merekam suara secara stream dan metode callback
    """

    global cumulated_status, end_count, start_count, recording, magnitudo, audiodata
    end_count=False
    start_count = 0
    recording=False
    magnitudo=[]
    audiodata=[]
    try:
        import sounddevice as sd

        #samplerate = sd.query_devices(args.device, 'input')['default_samplerate']
        samplerate = 16000.0

        delta_f = (high - low) / screenwidth
        fftsize = np.ceil(samplerate / delta_f).astype(int)
        low_bin = int(np.floor(low / delta_f))

        cumulated_status = sd.CallbackFlags()

        def callback(indata, frames, time, status):
            global cumulated_status, audiodata, magnitudo, end_count, start_count, recording, model
            
            
            cumulated_status |= status
            if any(indata):
                magnitude = np.abs(np.fft.rfft(indata[:, 0], n=fftsize))
                magnitude *= gain / fftsize

                rms = librosa.feature.rms(S=indata)
                rms = int(rms*32768)
                start_count += 1
                if rms>=thres:
                    if not recording :                    #and not end_count
                        print("Start record")
                        recording = True
                        start_count = 0
                        
                        
                if recording:
                    audiodata.extend(itertools.chain(indata.tolist()))
                    magnitudo.append(magnitude)
                    if start_count == int(samplerate / (samplerate * DURATION / 1000)):
                        print("End record")
                        start_count=0
                        end_count=True
                        recording=False
                        try:
                            soundfile.write("temp.wav",audiodata,16000)
                            predict("temp.wav", model=model)
                        except:
                            pass
                        audiodata=[]



        with sd.InputStream(device=None, channels=1, callback=callback,
                            blocksize=int(samplerate * DURATION / 1000),
                            samplerate=samplerate):
            while True:
                #response = input()
                #if response in ('', 'q', 'Q'):
                time.sleep(length)
                break
            if filename!=None: soundfile.write(filename,audiodata,16000)

        if cumulated_status:
            logging.warning(str(cumulated_status))
    except Exception as e:
        print(e)


In [9]:
predict('temp.wav', model=model)

NameError: name 'model' is not defined

In [24]:
record(length=5,filename="record.wav",thres=400)

From cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x00000212C8F80C18>:
Traceback (most recent call last):
  File "E:\anaconda3\envs\374\lib\site-packages\sounddevice.py", line 840, in callback_ptr
    return _wrap_callback(callback, data, frames, time, status)
  File "E:\anaconda3\envs\374\lib\site-packages\sounddevice.py", line 2678, in _wrap_callback
    callback(*args)
  File "C:\Users\IRZA\AppData\Local\Temp/ipykernel_6624/3674246945.py", line 46, in callback
  File "E:\anaconda3\envs\374\lib\site-packages\librosa\feature\spectral.py", line 938, in rms
    S.shape[0], S.shape[0] * 2 - 2, S.shape[0] * 2 - 1, frame_length
librosa.util.exceptions.ParameterError: Since S.shape[0] is 320, frame_length is expected to be 638 or 639; found 2048


In [None]:
predict("temp.wav", model=model)

In [None]:
import sounddevice
import soundfile

In [16]:
soundfile.read('./newdata/train/cipi/cipi-irza-1240-ns-2363.wav')

(array([ 0.0071106 ,  0.0039978 , -0.00140381, ...,  0.00317383,
         0.00701904,  0.        ]),
 16000)

In [11]:
import librosa

In [None]:
audiodata = b''.join(np.array(audiodata).flatten())

In [None]:
from preprocess import *

In [None]:
aaa = np.array(audiodata).flatten()

In [None]:
sv, sr = soundfile.read("../Ring09.wav")
sounddevice.play(sv,samplerate=16000)