In [None]:
from google.colab import drive
drive.mount('/gdrive')

%cd '/gdrive/MyDrive/Deep-Project'
!mkdir -p /content/dataset
!cp scream.zip /content/dataset/
%cd '/content/dataset/'
!unzip -q scream.zip

Mounted at /gdrive
/gdrive/MyDrive/Deep-Project
/content/dataset


In [9]:
# !pip install libportaudio2
# !pip install sounddevice
import librosa
from matplotlib import pyplot as plt
import sounddevice as sd

In [1]:
from os import listdir
from os.path import join, splitext
import time
import math
from tqdm import tqdm
from torch.utils.data   import Dataset, DataLoader
import pandas as pd
import numpy as np
import glob

In [2]:
label_path = 'C:/Git/scream/data/labels/'
label_files = [
    f for f in listdir(label_path)
    if splitext(join(label_path, f))[-1] == '.txt'
]

In [3]:
%cd C:/Git/scream/data/

C:\Git\scream\data


In [4]:
list_file_fn = 'file_list.xlsx'

with pd.ExcelWriter(list_file_fn) as writer:
    audio_names = []
    for i , file_name in enumerate(label_files):
        temp = pd.read_csv(label_path + file_name, sep = '\t', header = None)
        temp.columns = ['start', 'end', 'label']
        temp.to_excel(writer , index=False, sheet_name = str(i))
        audio_names.append(file_name[:-3] + 'wav')
        file_name
        if i == len(label_files) -1 :
            pd.DataFrame(audio_names, columns = ['audio_name']).to_excel(writer, sheet_name = 'list')

In [5]:
def load_label_data(file_path, i, frame_size):
    audio_label = pd.read_excel(file_path, sheet_name = str(i))
    audio_label['label'] = audio_label['label'].apply(lambda x : int(str(x)[0]))
    audio_label = np.array(audio_label)
    audio_label = audio_label[(audio_label[:,1] - audio_label[:,0]) >= frame_size]
    return audio_label

def load_sound_data(file_path, sampling_rate):
    audio, sr = librosa.load(file_path, sr = sampling_rate)
    print(audio, sr)
    return audio, sr

In [6]:
def process_frame(audio, sr, label, frame_size =1, stride = .2):
    n = np.sum(np.floor(((label[:,1] - label[:,0]) - frame_size) / stride + frame_size))
  
    frame_vector = np.zeros((int(n), frame_size * sr))  
    target_vector = np.zeros((int(n), 1)) 
 
    i = 0
    for temp_label in label:
        start = temp_label[0] 
        end = temp_label[1] 
        y = temp_label[2]  
 
        eter = int(
            np.floor((((end - start) - frame_size) / stride) + 1)
        )  
        for j in range(eter):  
            start_idx = int((start + j * stride) * sr)
            end_idx = start_idx + frame_size * sr
            frame_vector[i] = audio[start_idx:end_idx]  
            target_vector[i] = y  
            i += 1
    return frame_vector, target_vector

In [7]:
def extract_mel_feature(frame_vector, sr, n_mels, st = 512):
    mel_feature = librosa.feature.melspectrogram(
        y = frame_vector, sr= sr, n_mels= n_mels , hop_length = st
    )
    mel_feature = librosa.core.power_to_db(mel_feature, ref = np.max)
    return mel_feature

In [10]:
sr = 22050
frame_size = 1
stride = .2
n_mels = 64
n_feature = 34
short_time = 512

file_list = pd.read_excel('C:/Git/scream/data/file_list.xlsx', sheet_name='list')

frame_vectors = []
label_vectors = []
idx_count = []

for file_i, file_name in enumerate(file_list['audio_name']):
    if file_i % 10 ==0:
        print(f'in processing {file_i}th file ')
    
    label_data_temp = load_label_data('C:/Git/scream/data/file_list.xlsx', file_i, frame_size)

    if len(label_data_temp) == 0 :
           idx_count.append(0)
           continue

    audio_path = 'C:/Git/scream/data/raw_data/' + file_name
    audio_data_temp, sr  = load_sound_data(audio_path, sr)
    frame_vector, label_vector = process_frame(
        audio = audio_data_temp, sr=sr, label = label_data_temp, frame_size = frame_size, stride = stride
    )
    frame_vectors.append(frame_vector)
    label_vectors.append(label_vector)

    idx_count.append(frame_vector.shape[0])

frame_vectors = np.concatenate(frame_vectors)
label_vectors = np.concatenate(label_vectors)


in processing 0th file 
[ 0.          0.          0.         ...  0.00024623 -0.01489059
 -0.02284678] 22050
[ 0.          0.          0.         ... -0.00620373 -0.0070753
  0.        ] 22050
[ 0.          0.          0.         ... -0.02591142 -0.04979165
  0.        ] 22050
[0. 0. 0. ... 0. 0. 0.] 22050
[0.         0.         0.         ... 0.00030185 0.00030587 0.00028805] 22050
[0.        0.        0.        ... 0.0567134 0.0896961 0.       ] 22050
[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ... -4.2895663e-05
 -4.6592937e-05 -3.9977447e-05] 22050
[0. 0. 0. ... 0. 0. 0.] 22050
[0.         0.         0.         ... 0.00350587 0.0039388  0.        ] 22050
[0. 0. 0. ... 0. 0. 0.] 22050
in processing 10th file 
[0. 0. 0. ... 0. 0. 0.] 22050
[ 0.          0.          0.         ...  0.02753405 -0.04229661
  0.        ] 22050
[0.         0.         0.         ... 0.00873816 0.00927958 0.        ] 22050
[-0.00521851 -0.00527954 -0.00567627 ...  0.00271606  0.0057373
  0.00646973] 22050

[-0.03641534 -0.04412891 -0.04806279 ... -0.00679003 -0.00908985
  0.        ] 22050
[-0.00039891 -0.00012763  0.0004973  ...  0.02778113  0.02277214
  0.01104714] 22050
[ 0.00224789  0.00070296 -0.00277061 ...  0.00200753  0.00069645
 -0.00095245] 22050
in processing 140th file 
[0.         0.         0.         ... 0.12466783 0.34208277 0.39146855] 22050
[0. 0. 0. ... 0. 0. 0.] 22050
[0. 0. 0. ... 0. 0. 0.] 22050
[ 0.0078125  0.         0.        ... -0.0546875 -0.015625  -0.015625 ] 22050
[ 0.0234375  0.0234375 -0.0078125 ...  0.         0.         0.       ] 22050
[0. 0. 0. ... 0. 0. 0.] 22050
[0. 0. 0. ... 0. 0. 0.] 22050
[ 0.          0.          0.         ... -0.01122778 -0.01021228
 -0.01169566] 22050
[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ... -1.2559273e-05
 -2.0354904e-05 -8.2583747e-06] 22050
in processing 150th file 
[0. 0. 0. ... 0. 0. 0.] 22050
[ 0.          0.          0.         ... -0.0007614  -0.00065635
  0.        ] 22050
[0.         0.         0.         ..

In [12]:
train_ratio = .7

total_n = frame_vectors.shape[0]
train_n = int(total_n * train_ratio)

train_idxes = np.random.choice(total_n, train_n, replace = False)
train_mask = np.zeros(shape = total_n, dtype = bool)
train_mask[train_idxes] = True

In [13]:
train_mel_features = mel_features[train_mask]
valid_mel_features = mel_features[~train_mask]

train_label_vectors = label_vectors[train_mask]
valid_label_vectors = label_vectors[~train_mask]

In [14]:
import torch
from torch import nn, optim
from sklearn.metrics import f1_score, accuracy_score

In [15]:
class ScreamDataset(Dataset):
    def __init__(self, x, y):
        self.audio_features = x
        self.labels = y

    def __getitem__(self, idx):
        audio_features = torch.FloatTensor(self.audio_features[idx])
        audio_features = audio_features.unsqueeze(0)
        label = self.labels[idx]
        return (audio_features, label)

    def __len__(self):
        return len(self.audio_features)

In [16]:
batch_size = 512
learning_rate = 0.001
epochs = 10

In [17]:
train_dataset = ScreamDataset(train_mel_features, train_label_vectors)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)

valid_dataset = ScreamDataset(valid_mel_features, valid_label_vectors)
valid_loader = DataLoader(valid_dataset, batch_size = batch_size, shuffle = False)

In [18]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = nn.Sequential(
    # input = (n,1,64,44) (batchsize, channel, height, width)
    nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = (64,1),),
    # x = (n, 32, 1, 44)
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.Dropout2d(p=.3),
    nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = (1,9), stride =4),
    # x = (n, 64, 1, 9)
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.Dropout2d(p=.3),
    nn.Flatten(),
    nn.Linear(64 *1 * 9, 1)
).to(device)
print(device)

cuda


In [19]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [22]:
n_epochs = 200
train_n = len(train_dataset)
valid_n = len(valid_dataset)

for e in range(1, n_epochs +1):
    train_loss = 0
    train_acc = 0
    train_f1_score = 0
    valid_loss = 0
    valid_acc = 0
    valid_f1_score = 0

    model.train()

    for audio_feature, label in train_loader:
        audio_feature = audio_feature.to(device)
        label = label.to(device)
        optimizer.zero_grad()

        pred = model(audio_feature)
        
        loss = criterion(pred, label)
        loss.backward()
        optimizer.step()

        pred = torch.round(torch.sigmoid(pred))
        pred = pred.cpu().detach().numpy()
        label = label.cpu().detach().numpy()

        acc = accuracy_score(label, pred)
        f1 = f1_score(label,pred)

        n = len(label)
        train_loss += loss.item() * n
        train_acc += acc.item()*n
        train_f1_score += f1*n_epochs

    model.eval()
    with torch.no_grad():
        for audio_feature, label in valid_loader :
            audio_feature = audio_feature.to(device)
            label = label.to(device)

            pred = model(audio_feature)
            loss = criterion(pred, label)

            pred = torch.round(torch.sigmoid(pred))
            pred = pred.cpu().detach().numpy()
            label = label.cpu().detach().numpy()

            acc = accuracy_score(label, pred)
            f1 = f1_score(label,pred)

            n = len(label)
            valid_loss += loss.item() * n
            valid_acc += acc.item()*n
            valid_f1_score += f1*n_epochs


    train_loss = f'{train_loss / train_n : .4f}'
    train_acc = f'{train_acc / train_n : .3f}'
    train_f1_score = f'{train_f1_score / train_n : .3f}'    
    valid_loss = f'{valid_loss / valid_n : .4f}'
    valid_acc = f'{valid_acc / valid_n : .3f}'
    valid_f1_score = f'{valid_f1_score / valid_n : .3f}'

    # log metrics
    print(f'Epoch {e+0 : 03}')
    print('      |   loss  |   acc  |   f1   |')
    print(f'TRAIN | {train_loss} | {train_acc} | {train_f1_score} |')
    print(f'VALID | {valid_loss} | {valid_acc} | {valid_f1_score} |')
    print('----------------------------------')
    print()
        


Epoch  01
      |   loss  |   acc  |   f1   |
TRAIN |  0.0668 |  0.975 |  0.408 |
VALID |  0.1045 |  0.966 |  0.414 |
----------------------------------

Epoch  02
      |   loss  |   acc  |   f1   |
TRAIN |  0.0666 |  0.975 |  0.407 |
VALID |  0.1182 |  0.963 |  0.406 |
----------------------------------

Epoch  03
      |   loss  |   acc  |   f1   |
TRAIN |  0.0655 |  0.976 |  0.409 |
VALID |  0.1223 |  0.962 |  0.407 |
----------------------------------

Epoch  04
      |   loss  |   acc  |   f1   |
TRAIN |  0.0570 |  0.979 |  0.410 |
VALID |  0.1372 |  0.959 |  0.410 |
----------------------------------

Epoch  05
      |   loss  |   acc  |   f1   |
TRAIN |  0.0653 |  0.975 |  0.408 |
VALID |  0.1328 |  0.957 |  0.407 |
----------------------------------

Epoch  06
      |   loss  |   acc  |   f1   |
TRAIN |  0.0688 |  0.976 |  0.408 |
VALID |  0.1367 |  0.959 |  0.405 |
----------------------------------

Epoch  07
      |   loss  |   acc  |   f1   |
TRAIN |  0.0652 |  0.976 |  0.

Epoch  55
      |   loss  |   acc  |   f1   |
TRAIN |  0.0470 |  0.984 |  0.413 |
VALID |  0.1299 |  0.966 |  0.413 |
----------------------------------

Epoch  56
      |   loss  |   acc  |   f1   |
TRAIN |  0.0469 |  0.984 |  0.413 |
VALID |  0.1129 |  0.969 |  0.415 |
----------------------------------

Epoch  57
      |   loss  |   acc  |   f1   |
TRAIN |  0.0508 |  0.983 |  0.413 |
VALID |  0.1089 |  0.968 |  0.408 |
----------------------------------

Epoch  58
      |   loss  |   acc  |   f1   |
TRAIN |  0.0479 |  0.985 |  0.413 |
VALID |  0.1189 |  0.969 |  0.413 |
----------------------------------

Epoch  59
      |   loss  |   acc  |   f1   |
TRAIN |  0.0532 |  0.985 |  0.414 |
VALID |  0.1339 |  0.967 |  0.412 |
----------------------------------

Epoch  60
      |   loss  |   acc  |   f1   |
TRAIN |  0.0486 |  0.983 |  0.413 |
VALID |  0.1379 |  0.959 |  0.400 |
----------------------------------

Epoch  61
      |   loss  |   acc  |   f1   |
TRAIN |  0.0447 |  0.986 |  0.

Epoch  109
      |   loss  |   acc  |   f1   |
TRAIN |  0.0346 |  0.989 |  0.416 |
VALID |  0.1118 |  0.973 |  0.419 |
----------------------------------

Epoch  110
      |   loss  |   acc  |   f1   |
TRAIN |  0.0324 |  0.988 |  0.415 |
VALID |  0.1156 |  0.973 |  0.419 |
----------------------------------

Epoch  111
      |   loss  |   acc  |   f1   |
TRAIN |  0.0348 |  0.989 |  0.415 |
VALID |  0.1244 |  0.969 |  0.414 |
----------------------------------

Epoch  112
      |   loss  |   acc  |   f1   |
TRAIN |  0.0350 |  0.988 |  0.415 |
VALID |  0.1108 |  0.972 |  0.417 |
----------------------------------

Epoch  113
      |   loss  |   acc  |   f1   |
TRAIN |  0.0331 |  0.987 |  0.415 |
VALID |  0.1204 |  0.968 |  0.415 |
----------------------------------

Epoch  114
      |   loss  |   acc  |   f1   |
TRAIN |  0.0384 |  0.988 |  0.416 |
VALID |  0.1380 |  0.962 |  0.405 |
----------------------------------

Epoch  115
      |   loss  |   acc  |   f1   |
TRAIN |  0.0319 |  0.98

Epoch  162
      |   loss  |   acc  |   f1   |
TRAIN |  0.0362 |  0.989 |  0.416 |
VALID |  0.1342 |  0.970 |  0.416 |
----------------------------------

Epoch  163
      |   loss  |   acc  |   f1   |
TRAIN |  0.0322 |  0.989 |  0.416 |
VALID |  0.1782 |  0.958 |  0.402 |
----------------------------------

Epoch  164
      |   loss  |   acc  |   f1   |
TRAIN |  0.0282 |  0.992 |  0.418 |
VALID |  0.1403 |  0.961 |  0.406 |
----------------------------------

Epoch  165
      |   loss  |   acc  |   f1   |
TRAIN |  0.0249 |  0.992 |  0.418 |
VALID |  0.1070 |  0.975 |  0.417 |
----------------------------------

Epoch  166
      |   loss  |   acc  |   f1   |
TRAIN |  0.0245 |  0.990 |  0.417 |
VALID |  0.1416 |  0.968 |  0.414 |
----------------------------------

Epoch  167
      |   loss  |   acc  |   f1   |
TRAIN |  0.0288 |  0.990 |  0.416 |
VALID |  0.1306 |  0.968 |  0.411 |
----------------------------------

Epoch  168
      |   loss  |   acc  |   f1   |
TRAIN |  0.0316 |  0.98

In [21]:
!pip install kaggle
from google.colab import files, drive
#구글 드라이브 마운트
drive.mount('/gdrive')

model_path = '/content/model/model.pth'
torch.save(model, model_path)
new_model = torch.load(model_path, map_location='cpu')

%cd /content/
!mkdir -p /content/model/

%cd /content/model/
!cp model.pth /gdrive/MyDrive/Deep-Project/



ModuleNotFoundError: No module named 'google.colab'

In [None]:
import numpy as np
import pyqtgraph as pg
import pyaudio
from PyQt5 import QtCore, uic
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
import librosa
import torch


SAMPLING_RATE = 22050
CHUNK_SIZE = 22050
form_class = uic.loadUiType("22.ui")[0]


def feature_engineering_mel_spectrum(signal, sampling_rate, n_mels):
    cur_frame_temp = signal
    mel_spectrum_temp = librosa.feature.melspectrogram(
        y=cur_frame_temp,
        sr=sampling_rate,
        n_mels=n_mels,
        n_fft=2048,
        hop_length=512,
    )
    mel_spectrum_temp = librosa.core.power_to_db(mel_spectrum_temp)
    feature_vector = mel_spectrum_temp
    feature_vector = feature_vector[np.newaxis, :,:, np.newaxis]
    return feature_vector

class MicrophoneRecorder():
    def __init__(self, signal):
        self.signal = signal
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(
            format=pyaudio.paFloat32,
            channels=1,
            rate=SAMPLING_RATE,
            input=True,
            frames_per_buffer=CHUNK_SIZE
        )

    def read(self):
        data = self.stream.read(CHUNK_SIZE, False)
        y = np.fromstring(data, 'float32')
        self.signal.emit(y)


    def close(self):
        print('멈춤')
        self.stream.stop_stream()
        self.stream.close()
        self.p.terminate()


class MyWindow(QMainWindow, form_class):
    read_collected = QtCore.pyqtSignal(np.ndarray)
    def __init__(self, model):
        super(MyWindow, self).__init__()
        self.setupUi(self)
        self.read_collected.connect(self.update)

        self.model = model

        # Bargraph
        pg.setConfigOptions(background='w', foreground='k')

        # hbox = QHBoxLayout()
        self.pw1 = pg.PlotWidget(title="BarGraph")
        self.pw1.showGrid(x=True, y=True)

        self.graph_box.addWidget(self.pw1)
        # self.setLayout(hbox)
        self.pw1.setGeometry(4, 1, 10, 5)  # x, y, width, height

        ticks = [list(zip(range(2), ('Environmental sound', 'Scream sound')))]
        xax = self.pw1.getAxis('bottom')
        xax.setTicks(ticks)
        self.show()


    def update(self, chunk):
        x = np.arange(2)

        feature_vector = feature_engineering_mel_spectrum(chunk, SAMPLING_RATE, 64)
        feature_vector = torch.tensor(feature_vector).float()
        feature_vector = feature_vector.squeeze(3).unsqueeze(1)
        y_softmax = float(
            torch.sigmoid(self.model(feature_vector)).detach().numpy()
        )

        if y_softmax > 0.5:
            pixmap = QPixmap("img/scream.png")
            self.label_5.setPixmap(QPixmap(pixmap))
        else:
            pixmap = QPixmap("img/normal.png")
            self.label_5.setPixmap(QPixmap(pixmap))

        self.pw1.clear()
        barchart = pg.BarGraphItem(
            x=x, height=[1 - y_softmax, y_softmax], width=1, brush=(159, 191, 229)
        )
        self.pw1.addItem(barchart)

In [None]:
import sys
from PyQt5 import QtCore
from PyQt5.QtWidgets import QApplication
from demo import MyWindow, MicrophoneRecorder
import torch
import torch.nn as nn

sampling_rate = 22050  # Hz
chunk_size = 22050  # samples

model_dir = 'model.pth'
model = torch.load(model_dir, map_location = 'cpu')

prediction_i = 0
predictions_collection = []


app = QApplication(sys.argv)
myWindow = MyWindow(model=model)
mic = MicrophoneRecorder(myWindow.read_collected)

# time (seconds) between reads
interval = sampling_rate / chunk_size
t = QtCore.QTimer()
t.timeout.connect(mic.read)
t.start(500)  # QTimer takes ms

myWindow.show()
app.exec_()