In [17]:
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
import glob as gb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import zipfile
import warnings
import math
import os

In [18]:
clean_audio = '/Users/anushkaanand/Desktop/venv/cleansound/ravdess_rewritten_8k'
noisy_audio = '/Users/anushkaanand/Desktop/venv/finalsound'

In [19]:
SAMPLE_RATE = 22050
TRACK_DURATION = 3 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
num_mfcc = 128
n_fft = 2048
hop_length = 615
num_segments = 5

mfcc_features_mixed_sound = []

for filename in os.listdir(noisy_audio):
    file_path = os.path.join(noisy_audio, filename)
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
   
    for d in range(num_segments):
        mfcc = librosa.feature.mfcc(y=y, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc_features_mixed_sound.append(mfcc.T)
        
mfcc_features_mixed_sound = np.array(mfcc_features_mixed_sound)
print(f'MFCC Features Shape: {mfcc_features_mixed_sound.shape}')

MFCC Features Shape: (5000, 128, 128)


In [None]:
SAMPLE_RATE = 22050
TRACK_DURATION = 3 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
num_mfcc = 128
n_fft = 2048
hop_length = 615
num_segments = 5

mfcc_features_clean_audio = []

for filename in os.listdir(clean_audio):
    file_path = os.path.join(clean_audio, filename)
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    # samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    # num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    for d in range(num_segments):
        mfcc = librosa.feature.mfcc(y=y, sr=SAMPLE_RATE, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc_features_clean_audio.append(mfcc.T)
        
mfcc_features_clean_audio = np.array(mfcc_features_clean_audio)
print(f'MFCC Features Shape: {mfcc_features_clean_audio.shape}')

In [7]:
min_samples = min(mfcc_features_mixed_sound.shape[0], mfcc_features_clean_audio.shape[0])

# Trim both arrays to the smaller size
noisy_mfcc_reshaped_trimmed = mfcc_features_mixed_sound[:min_samples]
clean_mfcc_reshaped_trimmed = mfcc_features_clean_audio[:min_samples]

# Now perform the train-test split
x_train, x_test, y_train, y_test = train_test_split(
    noisy_mfcc_reshaped_trimmed, clean_mfcc_reshaped_trimmed, test_size=0.1
)

print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")

x_train shape: (4500, 26, 128)
y_train shape: (4500, 26, 128)


In [8]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation, MaxPool2D, Concatenate
def conv_block (input, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(input)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    return (x)

def max_pooling (input, num_filters):
    x = conv_block(input, num_filters)
    p = MaxPool2D((2,2))(x)

    return (x,p)

def upscaling (input, num_filters, skip_features):
    x = Conv2DTranspose(num_filters, (2,2), strides = 2, padding = "same")(input)
    print(f"Shape after Conv2DTranspose: {x.shape}")
    print(f"Shape of skip_features: {skip_features.shape}")
    x = concatenate([x, skip_features])
    x = conv_block(x, num_filters)
    return (x)

def U_NET (input_shape):
    inputs = Input(input_shape)

    # s1, p1 = max_pooling(inputs, 64)
    # print(s1.shape)
    # print(p1.shape)
    s2, p2 = max_pooling(inputs, 128)
    s3, p3 = max_pooling(p2, 256)
    s4 ,p4 = max_pooling(p3, 512)

    b1 = conv_block(p4, 1024)

    u4 = upscaling(b1, 512, s4)
    u3 = upscaling(u4, 256, s3)
    u2 = upscaling(u3, 128, s2)
    print(u2.shape)
    # u1 = upscaling(u2, 64, s1)
    
    outputs = Conv2D(1, 1, padding="same", activation="sigmoid")(u2)
    print(outputs.shape)
    model = Model(inputs, outputs, name="U-Net")
    return model

In [9]:
# print(x_train.shape)
# x_train = x_train[..., np.newaxis]
# print(x_train.shape)
# x_test = x_test[..., np.newaxis]

In [10]:
print(x_train.shape)

IMG_HEIGHT = x_train.shape[1]
IMG_WIDTH  = x_train.shape[2]
IMG_CHANNELS = 1

input_shape = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)

(4500, 26, 128)


In [12]:
from keras.layers import Conv2DTranspose, Cropping2D, ZeroPadding2D, concatenate

def upscaling(input, num_filters, skip_features):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding='same')(input)
    print(f"Shape after Conv2DTranspose: {x.shape}")
    print(f"Shape of skip_features: {skip_features.shape}")

    # Adjust shapes if necessary
    # Example: Padding or Cropping
    if x.shape[1] > skip_features.shape[1]:
        x = Cropping2D(((x.shape[1] - skip_features.shape[1]) // 2, 0))(x)
    elif x.shape[1] < skip_features.shape[1]:
        x = ZeroPadding2D(((skip_features.shape[1] - x.shape[1]) // 2, 0))(x)

    # Repeat for other dimensions if necessary

    x = concatenate([x, skip_features])
    x = conv_block(x, num_filters)
    return x

In [13]:
model = U_NET(input_shape)
model.compile(optimizer=Adam(learning_rate= 1e-3), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Shape after Conv2DTranspose: (None, 6, 32, 512)
Shape of skip_features: (None, 6, 32, 512)
Shape after Conv2DTranspose: (None, 12, 64, 256)
Shape of skip_features: (None, 13, 64, 256)


ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 12, 64, 256), (None, 13, 64, 256)]

In [30]:
# import numpy as np

# # Assuming each sample is a single row with 13 MFCC features
# # Reshape x_train to (num_samples, 13, 1, 1) for a U-Net with a single channel
# x_train_reshaped = x_train.reshape((x_train.shape[0], 13, 1, 1))
# x_test_reshaped = x_test.reshape((x_test.shape[0], 13, 1, 1))

# # Also ensure y_train and y_test are reshaped accordingly if needed
# # Example: If y_train is binary classification, it could be reshaped to (num_samples, 1)
# # print(y_train.shape)
# # y_train_reshaped = y_train.reshape((y_train.shape[0], 1))
# # y_test_reshaped = y_test.reshape((y_test.shape[0], 1))

In [None]:
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=40)

Epoch 1/5
[1m 23/113[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m2:12:48[0m 89s/step - accuracy: 4.9450e-05 - loss: -259.3033