### Importing the library

In [None]:
import tensorflow as tf
import keras
import librosa
import os
import time
from keras import layers
import numpy as np
from tensorflow.keras import layers, initializers

### Creating the dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Mixture Dataset creation

In [None]:
def load_and_process_audio(file_paths, total_samples=24000):
    batch_size = len(file_paths)
    audio_data = np.zeros((batch_size, total_samples), dtype=np.float32)

    for i, file_path in enumerate(file_paths):
        try:
            data, sr = librosa.load(file_path, sr=8000, duration=3.0)
            print(sr)
            print('shape of data before truncate or padding', data.shape)
            print(file_path)

            if len(data) > total_samples:
                data = data[:total_samples]
                print('shape of data after truncating', data.shape)
            elif len(data) < total_samples:
                padding = total_samples - len(data)
                data = np.pad(data, (0, padding), 'constant')
                print('shape of data after padding', data.shape)

            audio_data[i, :] = data

        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            audio_data[i, :] = np.zeros(total_samples)
    return tf.convert_to_tensor(audio_data, dtype=tf.float32)

audio_tensor = load_and_process_audio(full_track)
print(audio_tensor.shape)

#### Saving mixture dataset

In [None]:
import numpy as np
np.save("/content/drive/MyDrive/data/train_x.npy", audio_tensor)

### Target Dataset building

In [None]:
!rm -r /content/drive/MyDrive/data/noise

In [None]:
import os
import librosa
import numpy as np


dataset_path = "/content/drive/MyDrive/data"

sources = np.zeros((len(track_only), 2, 24000), dtype=np.float32)

for example_index, file in enumerate(track_only):
    for source_index in range(2):
        source_file_path = os.path.join(dataset_path, f"s{source_index + 1}", file)
        print(source_file_path)
        source_audio, sr = librosa.load(source_file_path, sr=8000,  duration=3.0)
        print(sr)
        print(source_audio.shape)
        if len(source_audio) < 24000:
            padded_audio = np.pad(source_audio, (0, 24000 - len(source_audio)), mode='constant')
            print('shape after padding', padded_audio.shape)
            sources[example_index, source_index, :] = padded_audio
            print(sources[example_index, source_index, :].shape)
        else:
            sources[example_index, source_index, :] = source_audio[:24000]

print(sources.shape)

In [None]:
for example_index, file in enumerate(track_only[:4]):
    print(example_index, file)

0 8838-298546-0051_6529-62556-0003.wav
1 8838-298545-0031_2910-131096-0077.wav
2 248-130652-0016_4051-11218-0059.wav
3 669-129061-0055_4267-78186-0019.wav


#### Saving target dataset

In [None]:
import numpy as np
np.save("/content/drive/MyDrive/data/train_y.npy", sources)

In [None]:
dataset_path = "/content/drive/MyDrive/data"
mix_both_path = os.path.join(dataset_path, "mix_both")
track_only = [file for file in os.listdir(mix_both_path)]
full_track = [os.path.join(mix_both_path, file) for file in track_only]

In [None]:
track_only

In [None]:
len(track_only)

13900

In [None]:
full_track

In [None]:
sam, sr = librosa.load('/content/drive/MyDrive/data/mix_both/7127-75947-0018_5105-28240-0016.wav', sr = None)
print(sr)
sam.shape

In [None]:
track_only[2270] =='237-126133-0013_908-157963-0010.wav'

True

In [None]:
track_only[2270]

'237-126133-0013_908-157963-0010.wav'

#### Loading saved dataset

In [None]:
x = np.load("/content/drive/MyDrive/data/train_x.npy", allow_pickle=True)
y = np.load("/content/drive/MyDrive/data/train_y.npy", allow_pickle=True)

In [None]:
x[2270]

array([-0.00042725, -0.00289917,  0.00137329, ...,  0.04263306,
        0.07681274,  0.09039307], dtype=float32)

### Save the track_only list to a json file

In [None]:
import json

def save_list_to_text(data_list, filename):
    try:
        with open(filename, 'w') as f:
            json.dump(data_list, f, indent=4)
        print(f"List successfully saved to '{filename}'")
    except Exception as e:
        print(f"An error occurred while saving the list: {e}")


output_filename = "tracklist.txt"
save_list_to_text(track_only, output_filename)

List successfully saved to 'tracklist.txt'


### Load the track_only list from a json file

In [None]:
def load_list_from_text(filename):
    try:
        with open(filename, 'r') as f:
            loaded_list = json.load(f)
        print(f"List successfully loaded from '{filename}'")
        return loaded_list
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found.")
        return None
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{filename}'.")
        return None
    except Exception as e:
        print(f"An error occurred while loading the list: {e}")
        return None

loaded_data = load_list_from_text('/content/tracklist.txt')
if loaded_data:
    print("Loaded list:", loaded_data)

In [None]:
loaded_data [455:458]

['26-495-0030_412-126975-0046.wav',
 '32-4137-0027_289-121665-0019.wav',
 '7264-92316-0032_150-132655-0034.wav']

In [None]:
track_only[455:458]

['26-495-0030_412-126975-0046.wav',
 '32-4137-0027_289-121665-0019.wav',
 '7264-92316-0032_150-132655-0034.wav']

### Random checking sample 1

In [None]:
sam, sr = librosa.load('/content/drive/MyDrive/data/mix_both/4406-16882-0021_302-123516-0025.wav', sr = None)

In [None]:
sam[:24000] == x[10234]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
sam_s0, sr = librosa.load('/content/drive/MyDrive/data/s1/4406-16882-0021_302-123516-0025.wav', sr = None)
sam_s1, sr = librosa.load('/content/drive/MyDrive/data/s2/4406-16882-0021_302-123516-0025.wav', sr = None)

In [None]:
y[10234][0] == sam_s0[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
y[10234][1] == sam_s1[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

### Random checking sample 2

In [None]:
x[5999]

array([ 0.03668213,  0.04165649,  0.02160645, ...,  0.0039978 ,
       -0.01171875, -0.00411987], dtype=float32)

In [None]:
track_only[5999]

'4137-11701-0036_3486-166446-0028.wav'

In [None]:
y[5999]

array([[-3.0517578e-04, -2.4414062e-04, -1.5258789e-04, ...,
        -2.4414062e-04, -2.7465820e-04, -1.2207031e-04],
       [-3.0517578e-05,  1.5258789e-04,  2.7465820e-04, ...,
         8.5449219e-04,  8.5449219e-04,  2.6855469e-03]], dtype=float32)

In [None]:
sam, sr = librosa.load('/content/drive/MyDrive/data/mix_both/4137-11701-0036_3486-166446-0028.wav', sr = None)
sam.shape, sr

((127640,), 8000)

In [None]:
x[5999] == sam[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
sam_s0, sr = librosa.load('/content/drive/MyDrive/data/s1/4137-11701-0036_3486-166446-0028.wav', sr = None)
sam_s1, sr = librosa.load('/content/drive/MyDrive/data/s2/4137-11701-0036_3486-166446-0028.wav', sr = None)

In [None]:
y[5999][0] == sam_s0[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
y[5999][1] == sam_s1[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

### Random Checking Sample 3

In [None]:
x[989]

array([ 0.01480103,  0.0184021 ,  0.01629639, ..., -0.0953064 ,
       -0.09362793, -0.08230591], dtype=float32)

In [None]:
y[989]

array([[-5.1879883e-04, -1.2207031e-04,  1.3427734e-03, ...,
         1.2817383e-03,  3.6926270e-03,  3.0212402e-03],
       [ 1.8310547e-04,  5.1879883e-04,  2.1362305e-04, ...,
        -1.1892700e-01, -1.3049316e-01, -1.2448120e-01]], dtype=float32)

In [None]:
track_only[989]

'3982-178459-0038_460-172359-0078.wav'

In [None]:
sam, sr = librosa.load('/content/drive/MyDrive/data/mix_both/3982-178459-0038_460-172359-0078.wav', sr = None)
sam.shape, sr

((107880,), 8000)

In [None]:
x[989] == sam[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
sam_s0, sr = librosa.load('/content/drive/MyDrive/data/s1/3982-178459-0038_460-172359-0078.wav', sr = None)
sam_s1, sr = librosa.load('/content/drive/MyDrive/data/s2/3982-178459-0038_460-172359-0078.wav', sr = None)

In [None]:
y[989][0] == sam_s0[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
y[989][1] == sam_s1[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

### Random Checking Sample 4

In [None]:
x[8567]

array([-0.01535034, -0.03121948, -0.02154541, ..., -0.01974487,
        0.00112915,  0.00054932], dtype=float32)

In [None]:
y[8567]

array([[ 0.00064087, -0.00637817, -0.00119019, ...,  0.00091553,
         0.00244141,  0.00125122],
       [-0.00054932, -0.0007019 , -0.0007019 , ...,  0.00479126,
         0.00613403,  0.00756836]], dtype=float32)

In [None]:
track_only[8567]

'200-126784-0034_8063-274117-0026.wav'

In [None]:
sam, sr = librosa.load('/content/drive/MyDrive/data/mix_both/200-126784-0034_8063-274117-0026.wav', sr = None)
sam.shape, sr

((30440,), 8000)

In [None]:
x[8567] == sam[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
sam_s0, sr = librosa.load('/content/drive/MyDrive/data/s1/200-126784-0034_8063-274117-0026.wav', sr = None)
sam_s1, sr = librosa.load('/content/drive/MyDrive/data/s2/200-126784-0034_8063-274117-0026.wav', sr = None)

In [None]:
y[8567][0] == sam_s0[:24000]

array([ True,  True,  True, ...,  True,  True,  True])

In [None]:
y[8567][1] == sam_s1[:24000]

array([ True,  True,  True, ...,  True,  True,  True])