<a href="https://colab.research.google.com/github/its-rajesh/Audio-Bleeding-Removal/blob/main/BleedingRemoval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Network for Bleeding Removal

### Imports

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
import soundfile as sf
import librosa as lb
import numpy as np
from matplotlib import pyplot as plt
import os
import pandas as pd

In [None]:
import re

In [None]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


### TPU Essentials

In [None]:
print("Tensorflow version " + tf.__version__)

try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

Tensorflow version 2.8.2
Running on TPU  ['10.67.186.66:8470']




In [None]:
AUTO = tf.data.experimental.AUTOTUNE

### Creating Training & Test set

In [None]:
path = '/drive/My Drive/Projects/Bleeding-Removal/Dataset/bleeded/'
all_files = os.listdir(path)

In [None]:
temp_path = '/drive/My Drive/Projects/Bleeding-Removal/Dataset/'

training_data = []
test_data = []

training_label = []
test_label = []

count = 1
for i in range(800): #12000 actual
  res = re.split('(\d+)', all_files[i])
  label = res[0]
  file_no = res[1]

  f_path = temp_path+'bleeded/'+all_files[i]
  f, sr = lb.load(f_path, sr=8000, mono=True)

  if count <= 500: #actual: 10000
    if len(f) == 120000:
      training_data.append(f)

      l_path = temp_path+'grouped_by_three/'+label+'/'+file_no+'.wav'
      l, lsr = lb.load(l_path, sr=8000, mono=True)

      training_label.append(l)

      count += 1
      if count%50 == 0:
        print('{} training files read out of 500'.format(count))
  else:
    if len(f) == 120000:
      test_data.append(f)

      l_path = temp_path+'grouped_by_three/'+label+'/'+file_no+'.wav'
      l, lsr = lb.load(l_path, sr=8000, mono=True)

      test_label.append(l)

50 training files read out of 500
100 training files read out of 500
150 training files read out of 500
200 training files read out of 500
250 training files read out of 500
300 training files read out of 500
350 training files read out of 500
400 training files read out of 500
450 training files read out of 500
500 training files read out of 500


In [None]:
training_data = np.array(training_data)
training_label = np.array(training_label)
test_data = np.array(test_data)
test_label = np.array(test_label)

In [None]:
training_data.shape, test_data.shape

((500, 120000), (260, 120000))

In [None]:
td = pd.DataFrame(training_data)
tl = pd.DataFrame(training_label)
ted = pd.DataFrame(test_data)
tel = pd.DataFrame(test_label)

td.to_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/training_data.csv', header=False, index=False)
tl.to_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/training_label.csv', header=False, index=False)
ted.to_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/test_data.csv', header=False, index=False)
tel.to_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/test_label.csv', header=False, index=False)

### Read CSV if already created. Bypass above snippet

In [None]:
with tpu_strategy.scope():
  training_data = np.array(pd.read_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/training_data.csv'))
  training_label = np.array(pd.read_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/training_label.csv'))
  test_data = np.array(pd.read_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/test_data.csv'))
  test_label = np.array(pd.read_csv('/drive/My Drive/Projects/Bleeding-Removal/Dataset/test_label.csv'))

### STFT

In [None]:
def calc_stft(data):
  stft_data = []
  for i in data:
    stft_data.append(np.abs(lb.stft(i,n_fft=512)))

  return np.array(stft_data)

In [None]:
training_stft = calc_stft(training_data)
training_label_stft = calc_stft(training_label)
test_stft = calc_stft(test_data)
test_label_stft = calc_stft(test_label)

In [None]:
training_stft.shape, training_label_stft.shape

((499, 257, 938), (499, 257, 938))

### CNN Model

In [None]:
import time
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Dense,Flatten,Dropout,Conv2D,MaxPool2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling2D, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

In [None]:
batch_size = 16 * tpu_strategy.num_replicas_in_sync
epochs = 2
inp = Input(shape =(257, 938, 1))
out_dim = 257*938

In [None]:
with tpu_strategy.scope():
  start = time.time()

  x = Conv2D(32, 3, activation='relu')(inp)
  x = BatchNormalization()(x)
  x = MaxPooling2D(pool_size=(2, 2))(x)
  x = Dropout(0.30)(x)

  x = Conv2D(64, 3, activation='relu')(x)
  x = BatchNormalization()(x)
  x = MaxPooling2D(pool_size=(2, 2))(x)
  x = Dropout(0.30)(x)

  x = Flatten()(x)
  #x = Dense(100, activation='relu')(x)
  #x = Dropout(0.30)(x)

  x = Dense(out_dim, activation='relu')(x)
  x = Reshape((257, 938))(x)

ResourceExhaustedError: ignored

In [None]:
x_train = training_stft
y_train = training_label_stft
X_TEST = test_stft
Y_TEST = test_label_stft

In [None]:
257*938

241066

In [None]:
bleed_removal = Model(inputs = inp, outputs = x)
bleed_removal.compile(loss='MeanSquaredError', optimizer = 'adam', metrics='accuracy') 
#classify.summary()
bleed_removal_train = bleed_removal.fit(x_train,y_train, epochs = epochs ,batch_size=batch_size)
end = time.time()
print("time taken",time)

Epoch 1/50


ValueError: ignored

In [None]:
test_eval = bleed_removal.evaluate(X_TEST,  Y_TEST, verbose=0)
print('Test', test_eval)

In [None]:
plt.plot(bleed_removal_train.history['accuracy'])
plt.legend(['training'], loc = 'upper left')
plt.show()

In [None]:
plt.plot(bleed_removal_train.history['loss'])
plt.legend(['training'], loc = 'upper left')
plt.show()

In [None]:
predictions = bleed_removal.predict(X_TEST, verbose=1)
result = np.argmax(predictions, axis=1)
result

In [None]:
cm = tf.math.confusion_matrix(Y_TEST, result)
cm

In [None]:
savemodelpath = '/home/rajesh/Desktop/Projects/Bleeding-Removal/bleed_model.h5'
bleed_removal.save(savemodelpath)