# Copy data

In [1]:
import json
import os
import math
import librosa
import numpy as np

In [2]:
import shutil
# Define your source and destination paths
source_folder = '/WAVE/projects/CSEN-342-Wi24/data/pr3'
destination_folder = '/WAVE/users/unix/lhuang2/pr3'

shutil.copytree(source_folder, destination_folder)

'/WAVE/users/unix/lhuang2/pr3'

# Put all audio file into a filefolder

In [3]:
import os
import shutil

def move_audio(source_folder_path, target_folder_path):
    # Ensure the target directory exists, create if it doesn't
    os.makedirs(target_folder_path, exist_ok=True)
    
    # List all .wav files in the source directory
    wav_files = [f for f in os.listdir(source_folder_path) if f.endswith('.wav')]
    
    # Move each .wav file to the target directory
    for file_name in wav_files:
        # Construct full file paths
        source_file_path = os.path.join(source_folder_path, file_name)
        target_file_path = os.path.join(target_folder_path, file_name)
        
        # Move the file
        shutil.move(source_file_path, target_file_path)
    
    print(f'Moved {len(wav_files)} files to {target_folder_path}')

In [4]:
# Define the source and target directory paths
source_folder_train = '/WAVE/users/unix/lhuang2/pr3/train'
target_folder_train = '/WAVE/users/unix/lhuang2/pr3/train/audio'
move_audio(source_folder_train, target_folder_train)

Moved 350 files to /WAVE/users/unix/lhuang2/pr3/train/audio


In [5]:
# Define the source and target directory paths
source_folder_val = '/WAVE/users/unix/lhuang2/pr3/val'
target_folder_val = '/WAVE/users/unix/lhuang2/pr3/val/audio'
move_audio(source_folder_val, target_folder_val)

Moved 75 files to /WAVE/users/unix/lhuang2/pr3/val/audio


In [6]:
# Define the source and target directory paths
source_folder_test = '/WAVE/users/unix/lhuang2/pr3/test'
target_folder_test = '/WAVE/users/unix/lhuang2/pr3/test/audio'
move_audio(source_folder_test, target_folder_test)

Moved 75 files to /WAVE/users/unix/lhuang2/pr3/test/audio


# Extract Features

In [2]:
def get_features(path, num_segments):
    
    num_mfcc=13
    n_fft=2048
    hop_length=512
    TRACK_DURATION = 5 # measured in seconds
     # make sure sliding down the whole audio 
    num_mfcc_vectors_per_segment = num_mfcc * num_segments     # 13 * 10 = 130
    samples_per_segment = num_mfcc_vectors_per_segment * hop_length    # 130 * 512 = 66560
    SAMPLES_PER_TRACK = samples_per_segment * num_segments             # 66560 * 10 = 665600
    SAMPLE_RATE = int(SAMPLES_PER_TRACK / TRACK_DURATION)              # 665600/5 = 13,312
    
    mfcc_list = []
    audio_path = os.path.join(path, 'audio')
    for filename in os.listdir(audio_path):
        filepath = os.path.join(audio_path, filename)
        signal, sample_rate = librosa.load(filepath, sr=SAMPLE_RATE)
                # process all segments of audio file
        for d in range(num_segments):
            # calculate start and finish sample for current segment
            start = samples_per_segment * d
            finish = start + samples_per_segment
    
            # extract mfcc
            mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
            mfcc = mfcc.T
            mfcc_list.append(mfcc.tolist())
            X = np.array(mfcc_list)
    return X
    

# Read and Convert Labels

In [3]:
def get_labels(path, num_segments):
    repeated_lines = []
    labels_path = os.path.join(path, 'labels.txt')
    with open(labels_path, 'r') as file:
        for line in file:
            clean_line = line.strip()
            for _ in range(num_segments):
                    repeated_lines.append(int(clean_line)-1)  # start from 0-24
    return np.array(repeated_lines)

In [4]:
num_segments= 10

In [5]:
path_train = '/WAVE/users/unix/lhuang2/pr3/train'

In [6]:
X_train = get_features(path_train, num_segments)

In [7]:
y_train = get_labels(path_train, num_segments)

In [8]:
path_val = '/WAVE/users/unix/lhuang2/pr3/val'

In [9]:
X_val = get_features(path_val, num_segments)

In [10]:
y_val = get_labels(path_val, num_segments)

In [11]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape

((3500, 131, 13), (3500,), (750, 131, 13), (750,))

# RNN Model train

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten

input_shape = (X_train.shape[1], X_train.shape[2]) # 130, 13
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Flatten())
model.add(Dense(25, activation='softmax'))

2024-03-05 22:11:06.257050: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-05 22:11:06.257193: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-05 22:11:06.487201: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-05 22:11:07.094369: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-05 22:11:17.304703: I tensorflow/core

In [13]:
# compile model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 131, 128)          72704     
                                                                 
 lstm_1 (LSTM)               (None, 131, 128)          131584    
                                                                 
 dropout (Dropout)           (None, 131, 128)          0         
                                                                 
 dense (Dense)               (None, 131, 256)          33024     
                                                                 
 dense_1 (Dense)             (None, 131, 128)          32896     
                                                                 
 dense_2 (Dense)             (None, 131, 64)           8256      
                                                                 
 dense_3 (Dense)             (None, 131, 32)           2

In [15]:
# train model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=100)

Epoch 1/100


2024-03-05 22:11:39.451081: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
2024-03-05 22:11:40.749278: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f38fc04aa00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-05 22:11:40.749313: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1080, Compute Capability 6.1
2024-03-05 22:11:40.791723: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1709705500.919286 3820744 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

# Predict for Test data

In [16]:
path_test = '/WAVE/users/unix/lhuang2/pr3/test'

In [17]:
X_test = get_features(path_test, num_segments)

In [18]:
X_test.shape

(750, 131, 13)

In [19]:
prediction = model.predict(X_test)



In [20]:
pred=np.argmax(model.predict(X_test),axis=1)



In [21]:
from collections import Counter
def most_frequency(numbers):

    # Use Counter to count occurrences of each number
    number_counts = Counter(numbers)
    
    # Find the most common number
    most_common_number = number_counts.most_common(1)[0][0]
    return most_common_number


In [22]:
# Initialize an empty list to hold the chunks
chunks = []
# The size of each chunk
chunk_size = 10
# Create chunks
for i in range(0, len(pred), chunk_size):
    chunk = pred[i:i + chunk_size]
    chunks.append(chunk)
# Now, 'chunks' contains 35 smaller lists, each with 10 numbers
#print(chunks)

In [23]:
test_label = []
for chunk in chunks:
    label = most_frequency(chunk)+1
    test_label.append(label)


In [24]:
print(len(test_label))

75


In [25]:
f=open('/WAVE/users/unix/lhuang2/pr3/test/labels.txt','w')
for i in test_label:
    f.write(str(i)+'\n')
f.close()