In [None]:
%cd /ai-adulteration-detection

# Normalization

In [None]:
import os
!pip install pydub
from pydub import AudioSegment, effects 

In [None]:
input_path = "recordings"
output_path = "normalized_recordings"

filenames = sorted([f for f in os.listdir(input_path) if f.endswith(".wav")]) 

for i, recording in enumerate(filenames):
  print(recording)
  rawsound = AudioSegment.from_wav(os.path.join(input_path, recording)) 
  normalizedsound = effects.normalize(rawsound) 
  normalizedsound.export(os.path.join(output_path, recording), format="wav")

Inspect attributes of recordings before and after normalization

In [None]:
input_path = "recordings"
filenames = sorted([f for f in os.listdir(input_path) if f.endswith(".wav")]) 

for i, recording in enumerate(filenames):
  print(recording)
  rawsound = AudioSegment.from_wav(os.path.join(input_path, recording)) 
  print("Number of bytes in sample", rawsound.sample_width) # Number of bytes in each sample
  print("Loudness", rawsound.rms) # A measure of loudness, average amplitude, NOT in db
  print("Highest amplitude", rawsound.max) 
  print("Highest possible amplitude", rawsound.max_possible_amplitude)
  print("----------")

In [None]:
output_path = "normalized_recordings"
filenames = sorted([f for f in os.listdir(output_path) if f.endswith(".wav")]) 

for i, recording in enumerate(filenames):
  print(recording)
  rawsound = AudioSegment.from_wav(os.path.join(output_path, recording)) 
  print("Number of bytes in sample", rawsound.sample_width) # Number of bytes in each sample
  print("Loudness", rawsound.rms) # A measure of loudness, average amplitude, NOT in db
  print("Highest amplitude", rawsound.max)
  print("Highest possible amplitude", rawsound.max_possible_amplitude)
  print("----------")

# Sound preprocessing - creating 2sec chunks

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [None]:
input_path = "normalized_recordings"
filenames = sorted([f for f in os.listdir(input_path) if f.endswith(".wav")]) 
for i, recording in enumerate(filenames):
  print(recording) 

### Some plotting functions

In [None]:
from scipy.io.wavfile import read

# read and plot recordings by sample
def plot_part_of_audio(input_path):
  filenames = sorted([f for f in os.listdir(input_path) if f.endswith(".wav")])
  for f in filenames:
    sample_rate, recording = read(os.path.join(input_path, f))   
    # length = recording.shape[0]/sample_rate
    # print(f"Length of this file = {length} s")
    plt.plot(recording[88200:132300:])
    plt.xlabel("Sample")
    plt.ylabel("Amplitude")
    plt.title(f[:-4] + "\nSample Rate:" + str(sample_rate))
    plt.show()

In [None]:
# plot_part_of_audio(input_path)

In [None]:
from scipy.io.wavfile import read

# read and plot recordings by time
def plot_audio(input_path):
  filenames = sorted([f for f in os.listdir(input_path) if f.endswith(".wav")])
  for f in filenames:
    sample_rate, recording = read(os.path.join(input_path, f))
    length = recording.shape[0]/sample_rate
    print(f"Length of this file = {length} s")
    time = np.linspace(0, length, recording.shape[0])
    plt.plot(time, recording)    
    plt.xlabel("Time [s]")
    plt.ylabel("Amplitude")
    plt.title(f[:-4] + "\nSample Rate:" + str(sample_rate))
    plt.show()

In [None]:
# plot_audio(input_path)

### Create 2-second chunks

In [None]:
!pip install pydub
from pydub import AudioSegment
from pydub.utils import make_chunks
import random



In [None]:
output_path = "sliding_chunks"
chunk_length_ms = 2000  # pydub calculates in millisec

o_b, n_b, c, e = 0, 0, 0, 0
all_c = 0

tot_number_of_chunks = 0

for f in filenames:
  recording = AudioSegment.from_wav(os.path.join(input_path, f))
  for s in range(10):
    recording = recording[2000+(200*s):-2000] # cuts 2 seconds from beginning and end 
    number_of_chunks = math.floor(recording.duration_seconds/2)
    tot_number_of_chunks += number_of_chunks
    chunks = make_chunks(recording, chunk_length_ms) 

    #Export all of the individual chunks as wav files
    for i, chunk in enumerate(chunks):
      if chunk.duration_seconds == chunk_length_ms/1000: # if it is exactly 2 seconds
        if ("Cheese1" in f) or ("Cheese2" in f) or ("Cheese3" in f):
          r = random.random()
          all_c += 1
          if r <= (1/3):
            label = "Cheese"
            chunk_name = "{0}_{1}.wav".format(label, c)
            c += 1
            
            print("exporting", chunk_name)
            chunk.export(os.path.join(output_path, chunk_name), format="wav")

        else:
          if "OrganicButter" in f:
            label = "OrganicButter"
            chunk_name = "{0}_{1}.wav".format(label, o_b)
            o_b += 1
          elif "NonorganicButter" in f: 
            label = "NonorganicButter"
            chunk_name = "{0}_{1}.wav".format(label, n_b)
            n_b += 1
          elif ("Cheese1" in f) or ("Cheese2" in f) or ("Cheese3" in f):
            label = "Cheese"
            chunk_name = "{0}_{1}.wav".format(label, c)
            c += 1
          elif "Empty" in f:
            label = "Empty"
            chunk_name = "{0}_{1}.wav".format(label, e)
            e += 1
          
          # print("exporting", chunk_name)
          chunk.export(os.path.join(output_path, chunk_name), format="wav") 

print("Exporting finished. Total number of chunks is {0}".format(o_b + n_b + c + e))

exporting Cheese_0.wav
exporting Cheese_1.wav
exporting Cheese_2.wav
exporting Cheese_3.wav
exporting Cheese_4.wav
exporting Cheese_5.wav
exporting Cheese_6.wav
exporting Cheese_7.wav
exporting Cheese_8.wav
exporting Cheese_9.wav
exporting Cheese_10.wav
exporting Cheese_11.wav
exporting Cheese_12.wav
exporting Cheese_13.wav
exporting Cheese_14.wav
exporting Cheese_15.wav
exporting Cheese_16.wav
exporting Cheese_17.wav
exporting Cheese_18.wav
exporting Cheese_19.wav
exporting Cheese_20.wav
exporting Cheese_21.wav
exporting Cheese_22.wav
exporting Cheese_23.wav
exporting Cheese_24.wav
exporting Cheese_25.wav
exporting Cheese_26.wav
exporting Cheese_27.wav
exporting Cheese_28.wav
exporting Cheese_29.wav
exporting Cheese_30.wav
exporting Cheese_31.wav
exporting Cheese_32.wav
exporting Cheese_33.wav
exporting Cheese_34.wav
exporting Cheese_35.wav
exporting Cheese_36.wav
exporting Cheese_37.wav
exporting Cheese_38.wav
exporting Cheese_39.wav
exporting Cheese_40.wav
exporting Cheese_41.wav
ex

In [None]:
tot_number_of_chunks == o_b + n_b + all_c + e # check point

True

In [None]:
print(o_b, n_b, c, e) # they should be close to each other to have balanced dataset

2507 2509 2490 2507


# Extract MFCCs and save as npz

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display

In [None]:
chunks_path = "sliding_chunks"
filenames = sorted([f for f in os.listdir(chunks_path) if f.endswith('.wav')])

In [None]:
# important
len(filenames) # check number of chunks, wait until it reads all of the chunks

10013

In [None]:
# split data into training, validation and test randomly
import random
shuffle = True
if shuffle: 
  random.seed(13)
  random.shuffle(filenames)

total = len(filenames)
list_train = filenames[0:int(total*0.8)]
list_valid = filenames[int(total*0.8):int(total*0.9)]
list_test = filenames[int(total*0.9)::]

In [None]:
dict_classes = {"Cheese":0, "Empty":1, "NonorganicButter":2, "OrganicButter":3}

### Obtain MFCCs


In [None]:
def extract_features_MFCCs(filename):
  """ This function takes the path for an audio file as a string, 
  loads it, and returns the MFCC of the audio. """
  
  file_path = os.path.join(chunks_path, filename)
  chunk, sample_rate = librosa.load(file_path, sr=None, res_type="kaiser_fast") 
  mfccs = librosa.feature.mfcc(y=chunk, sr=sample_rate, n_mfcc=40)
    
  return mfccs.T

In [None]:
def create_array_MFCCs(filenames):
    classes = []
    X_spect = np.empty((0, 173, 40)) # [1] is defined according to the length of the chunk
    count = 0
    # code skips records in case of errors
    for filename in filenames:
        try:
            count += 1    
            a_class = filename.split("_")[0]
            spect = extract_features_MFCCs(filename)

            # normalize for small shape differences
            spect = spect[:173, :] # according to the lengths of the chunks
            X_spect = np.append(X_spect, [spect], axis=0)
            classes.append(dict_classes[a_class])
            
            if count % 100 == 0:
                print("Currently processing:", count)
        except:
            print("Couldn't process:", count)
            continue
    y_arr = np.array(classes)
    return X_spect, y_arr

In [None]:
%mkdir npz_files
%cd /npz_files

/content/drive/My Drive/Data Science Projects/Peynir/Peynir-Paper/npz_files


In [None]:
X_test, y_test = create_array_MFCCs(list_test)
print(X_test.shape, y_test.shape)
np.savez("shuffled_test_arr_MFCCs", X_test, y_test)

Currently processing: 100
Currently processing: 200
Currently processing: 300
Currently processing: 400
Currently processing: 500
Currently processing: 600
Currently processing: 700
Currently processing: 800
Currently processing: 900
Currently processing: 1000
(1002, 173, 40) (1002,)


In [None]:
X_valid, y_valid = create_array_MFCCs(list_valid)
print(X_valid.shape, y_valid.shape)
np.savez("shuffled_valid_arr_MFCCs", X_valid, y_valid)

Currently processing: 100
Currently processing: 200
Currently processing: 300
Currently processing: 400
Currently processing: 500
Currently processing: 600
Currently processing: 700
Currently processing: 800
Currently processing: 900
Currently processing: 1000
(1001, 173, 40) (1001,)


In [None]:
X_train, y_train = create_array_MFCCs(list_train)
print(X_train.shape, y_train.shape)
np.savez("shuffled_train_arr_MFCCs", X_train, y_train)

Currently processing: 100
Currently processing: 200
Currently processing: 300
Currently processing: 400
Currently processing: 500
Currently processing: 600
Currently processing: 700
Currently processing: 800
Currently processing: 900
Currently processing: 1000
Currently processing: 1100
Currently processing: 1200
Currently processing: 1300
Currently processing: 1400
Currently processing: 1500
Currently processing: 1600
Currently processing: 1700
Currently processing: 1800
Currently processing: 1900
Currently processing: 2000
Currently processing: 2100
Currently processing: 2200
Currently processing: 2300
Currently processing: 2400
Currently processing: 2500
Currently processing: 2600
Currently processing: 2700
Currently processing: 2800
Currently processing: 2900
Currently processing: 3000
Currently processing: 3100
Currently processing: 3200
Currently processing: 3300
Currently processing: 3400
Currently processing: 3500
Currently processing: 3600
Currently processing: 3700
Currently 