In [3]:
import librosa
import os
import numpy as np

def load_audio_file(file_path):
    audio, sr = librosa.load(file_path, sr=16000)
    return audio

def get_spectrogram(audio):
    D = librosa.stft(audio, n_fft=512, hop_length=160, win_length=400)
    spect, phase = librosa.magphase(D)
    return spect.T

def preprocess_data(data_path):
    hotword = "hey_google"
    audio_files = os.listdir(data_path)
    audio_files = [f for f in audio_files if hotword in f]
    audio_files_train = audio_files[:int(len(audio_files)*0.8)]
    audio_files_val = audio_files[int(len(audio_files)*0.8):]

    X_train = []
    y_train = []

    for file in audio_files_train:
        audio = load_audio_file(os.path.join(data_path, file))
        spect = get_spectrogram(audio)
        X_train.append(spect)
        y_train.append(1)

    X_val = []
    y_val = []

    for file in audio_files_val:
        audio = load_audio_file(os.path.join(data_path, file))
        spect = get_spectrogram(audio)
        X_val.append(spect)
        y_val.append(1)

    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_val = np.array(X_val)
    y_val = np.array(y_val)

    return X_train, y_train, X_val, y_val

In [8]:
datapath = './newdata/train/cipi/'
X_train, y_train, X_val, y_val = preprocess_data(datapath)

In [6]:
import torch.nn as nn

class HotwordDetector(nn.Module):
    def init(self):
        super(HotwordDetector, self).init()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=(3,3), padding=(1,1))
        self.maxpool1 = nn.MaxPool2d(kernel_size=(2,2))
        self.conv2 = nn.Conv2d(16, 32, kernel_size=(3,3), padding=(1,1))
        self.maxpool2 = nn.MaxPool2d(kernel_size=(2,2))
        self.fc1 = nn.Linear(322525, 64)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.sigmoid(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

In [24]:
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class HotwordDataset(Dataset):
    def init(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        X = self.X[idx]
        y = self.y[idx]
        X = np.expand_dims(X, axis=0)
        X = torch.from_numpy(X).float()
        y = torch.from_numpy(np.array(y)).float()
        return X, y

def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    train_loss_history = []
    val_loss_history = []
    for epoch in range(num_epochs):
        train_loss = 0
        val_loss = 0

    model.train()
    for X_train_batch, y_train_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_train_batch)
        loss = criterion(y_pred, y_train_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_loss /= len(train_loader)
        train_loss_history.append(train_loss)

    model.eval()
    with torch.no_grad():
        for X_val_batch, y_val_batch in val_loader:
            y_pred = model(X_val_batch)
            loss = criterion(y_pred, y_val_batch)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    val_loss_history.append(val_loss)

    print("Epoch {} | Train loss: {:.4f} | Val loss: {:.4f}".format(epoch+1, train_loss, val_loss))

    return train_loss_history, val_loss_history

In [16]:
batch_size = 32
learning_rate = 0.001
num_epochs = 20

In [29]:
train_dataset = HotwordDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = HotwordDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

TypeError: object.__new__() takes exactly one argument (the type to instantiate)

In [13]:
model = HotwordDetector()

In [28]:
train_loss_history, val_loss_history = train(model, train_loader, val_loader, criterion, optimizer, num_epochs)

NameError: name 'train_loader' is not defined

In [30]:
test_file = "path/to/test/file"
audio = load_audio_file(test_file)
spect = getspectrogram(audio)
spect = normalize(spect)
spect = resize(spect, (224, 224))
spect = np.expand_dims(spect, axis=0)



FileNotFoundError: [Errno 2] No such file or directory: 'path/to/test/file'

In [34]:
hotword_probability = test(model, spect)
if hotword_probability > 0.5:
    print("Hotword detected!")
else:
    print("Hotword not detected.")

NameError: name 'test' is not defined

In [None]:
model_path = "path/to/model/file"
torch.save(model.state_dict(), model_path)