In [None]:
from scipy.io import wavfile
import scipy.io
from scipy import signal
from scipy.fft import fftshift, fft, fftfreq
import numpy as np
import matplotlib.pyplot as plt
import scipy
import math
import os


In [None]:
def read_audio(audio) :
    audio_file_path = audio
    samplerate, data = wavfile.read(audio_file_path)
    length = data.shape[0] / samplerate
    return samplerate,data,length

def signal_show(data,samplerate):
    length = data.shape[0] / samplerate
    time = np.linspace(0., length, data.shape[0])
    plt.plot(time, data)

    plt.xlabel("Time [s]")
    plt.ylabel("Amplitude")
    #plt.show()

def spectrogram(data,size,length,samplerate) :
    NFFT=1024
    #print("Size ",size)
    #print("Length ",length)
    subsampling_rate= size/length
    #print("Subsampling rate: ",subsampling_rate)
    ts = np.arange(size)* (1/subsampling_rate)
    dt=1/subsampling_rate

    window_hann = np.hanning(1024)

    #plt.figure(figsize=(15,8))
    spectrum,freqs,time,im=plt.specgram(data, Fs=subsampling_rate, cmap="jet", NFFT=NFFT
                                        ,noverlap =128,window=window_hann,detrend="mean" )

    spectrum_db = 20 * np.log10(spectrum / np.mean(spectrum))

    one_d_arr = spectrum_db.flatten()
    sorted_arr = np.sort(one_d_arr)
    percentile_index = int(0.9 * len(sorted_arr)) #0.9
    threshold = sorted_arr[percentile_index-1]
    spectrum_db[spectrum_db <= threshold] = np.min(spectrum_db)


    freq=np.linspace(0,subsampling_rate/2,num=spectrum_db.shape[0])
    #print("Maximum freq: ",np.max(freq))
    df = freq[1]-freq[0]

    #print("spectrum_size",spectrum.shape)
    plt.imshow(
        spectrum_db,
        extent=(ts[0], ts[-1]+dt, freqs[0], freqs[-1]+df),
        origin="lower",
        cmap="jet",
        aspect="auto",vmin=np.min(spectrum_db), vmax=np.max(spectrum_db)
    )
    plt.colorbar()
    plt.title('Spectrogram of tapir')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    return spectrum_db,np.min(spectrum_db),np.max(spectrum_db)

In [None]:
def spectrogram_afterfilter(data,size,length,samplerate,min_db,max_db) :
    NFFT=1024
    #print("Size ",size)
    #print("Length ",length)
    subsampling_rate= size/length
    print("Subsampling rate: ",subsampling_rate)
    ts = np.arange(size)* (1/subsampling_rate)
    dt=1/subsampling_rate

    window_hann = np.hanning(1024)

    #plt.figure(figsize=(15,8))
    spectrum,freqs,time,im=plt.specgram(data, Fs=subsampling_rate, cmap="jet", NFFT=NFFT
                                        ,noverlap =128,window=window_hann,detrend="mean" )

    #make sure there is no zero in the array
    smallest_positive = np.amin(spectrum[spectrum > 0])
    spectrum=np.where(spectrum <= 0, smallest_positive, spectrum)

    spectrum_db = 20 * np.log10(spectrum / np.mean(spectrum))

    one_d_arr = spectrum_db.flatten()
    sorted_arr = np.sort(one_d_arr)
    percentile_index = int(0.95 * len(sorted_arr))
    threshold = sorted_arr[percentile_index-1]
    spectrum_db[spectrum_db <= threshold] = np.min(spectrum_db)


    freq=np.linspace(0,subsampling_rate/2,num=spectrum_db.shape[0])
    #print("Maximum freq: ",np.max(freq))
    df = freq[1]-freq[0]

    print("spectrum_size",spectrum.shape)
    plt.imshow(
        spectrum_db,
        extent=(ts[0], ts[-1]+dt, freqs[0], freqs[-1]+df),
        origin="lower",
        cmap="jet",
        aspect="auto",vmin=min_db, vmax=max_db
    )
    plt.colorbar()
    plt.title('Spectrogram of tapir')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')


    """plotpath='/content/drive/MyDrive/FYP/Spectrogram/Tapir/Tapir_zoo_png/01_01_ch0_zoo.png'
    plt.savefig(plotpath, bbox_inches="tight",pad_inches = 0)"""
    return spectrum_db

In [None]:
def write_audio(file_name,data):
    samplerate = 44100
    wavfile.write(file_name, samplerate, data)

In [None]:
Tapir_zoo_path="D:\FYP dataset new\Temp trim audio\Tapir_Zoo_trim"
Tiger_zoo_path="D:\FYP dataset new\Temp trim audio\Tiger_Zoo_trim"
Elephant_zoo_path="D:\FYP dataset new\Temp trim audio\Elephant_Zoo_trim"
SunBear_zoo_path="D:\FYP dataset new\Temp trim audio\SunBear_Zoo_trim"

Tapir_wild_path="D:\FYP dataset new\Temp trim audio\Tapir_Wild_trim"
Tiger_wild_path="D:\FYP dataset new\Temp trim audio\Tiger_Wild_trim"
Elephant_wild_path="D:\FYP dataset new\Temp trim audio\Elephant_Wild_trim"
SunBear_wild_path="D:\FYP dataset new\Temp trim audio\SunBear_Wild_trim"

In [None]:
Tapir_zoo_list=os.listdir(Tapir_zoo_path)
Tapir_zoo_list

In [None]:
Tapir_audio=[]
Tapir_zoo_list=os.listdir(Tapir_zoo_path)
Tapir_audio=[Tapir_zoo_path + "\\" + file for file in Tapir_zoo_list]


In [None]:
Tapir_wild_list=os.listdir(Tapir_wild_path)
Tapir_audio_combine=Tapir_audio + [Tapir_wild_path + "\\" + file for file in Tapir_wild_list]


# Start here

In [None]:
SunBear_zoo_list=os.listdir(SunBear_zoo_path)
SunBear_wild_list=os.listdir(SunBear_wild_path)

for file in SunBear_wild_list:
    path=SunBear_wild_path+"\\"
    location=""
    if ("Zoo" in path):
        location="zoo_"
        
    elif("Wild" in path):
        location="wild_"
        
    samplerate,data,length = read_audio(path+file)
    write_audio("Sun Bear/"+location+file,data)

# Pitching


In [None]:

def manipulate(data, sampling_rate, pitch_factor):
    return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=pitch_factor)

In [None]:
fol_path="D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_wav"
fol_list=os.listdir(fol_path)

for file in fol_list:
    if file ==".ipynb_checkpoints":
        continue
    samplerate,data,length = read_audio(fol_path+"\\"+file)
    float_data = data.astype(float) 
    data_pitch = manipulate(float_data, samplerate, 3)
    int_data =data_pitch.astype(int)
    write_audio("D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_aug_spectrogram\\pitch_"+file,int_data)

# Test read result

In [None]:
example="Elephant_pitch"
list1 = os.listdir("D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_aug_spectrogram")

samplerate,data,length = read_audio("D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_aug_spectrogram"+"\\"+list1[3])
signal_show(data,samplerate)

display(Audio(data,rate=samplerate))

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)

# Add noise

In [None]:
import torch
import torchaudio
import torchaudio.functional 
import os

In [None]:
fol_path="D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_wav"
fol_list = os.listdir(fol_path)
save_fol = "Add RainNoise/EnvironmentNoise"
samplerate = 44100
length = 1

for file in fol_list:
    if file ==".ipynb_checkpoints":
        continue
    speech, _ = torchaudio.load(fol_path+"//"+file, format='wav')
    noise, _ = torchaudio.load("rainforest_rainNoise.wav", format='wav')
    noise = noise[0, : speech.shape[1]]
    if speech.size(-1) >44100 :
        speech =  speech[:, :noise.size(-1)] 
    noise=noise.unsqueeze(dim=0)
    snr_dbs = torch.tensor([20])
    noisy_speeches = torchaudio.functional.add_noise(speech, noise, snr_dbs) 
    noisy_speeches_np=noisy_speeches.numpy()
    flattened_array = noisy_speeches_np.flatten()
    
    write_audio(save_fol+"//addRainNoise_"+file,flattened_array)


In [None]:
#test with example

example="Add Noise/Tapir_addNoise"
list1 = os.listdir(example)
samplerate,data,length = read_audio(example+"\\"+list1[50])

signal_show(data,samplerate)

display(Audio(data,rate=samplerate))

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)

#write_audio("Tapir_addNoise/addNoise_wild_10_1_ch0.wav",data)

# Shift sound 


In [None]:
import numpy as np
def manipulate(data, sampling_rate, shift_max, shift_direction):
    shift = np.random.randint(sampling_rate * shift_max)
    if shift_direction == 'right':
        shift = -shift
    elif shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift
    augmented_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data[:shift] = np.random.randint(1,500)
    else:
        augmented_data[shift:] = np.random.randint(1,500)
    return augmented_data

In [None]:
# test
fol_path="D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_wav"
fol_list = os.listdir(fol_path)
save_fol = "D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_aug_wav"
samplerate = 44100
length = 1

In [None]:
samplerate,data,length = read_audio(fol_path+"/"+fol_list[1])

signal_show(data,samplerate)

display(Audio(data,rate=samplerate))

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim=spectrogram_afterfilter(data,data.shape[0],length,samplerate,min_db,max_db)

In [None]:
#shift example 

shift_data = manipulate(data, samplerate, 0.3, "right")

signal_show(shift_data,samplerate)

display(Audio(shift_data,rate=samplerate))

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim=spectrogram_afterfilter(shift_data,shift_data.shape[0],length,samplerate,min_db,max_db)

In [None]:
#shift example 

shift_data = manipulate(data, samplerate, 0.3, "left")

signal_show(shift_data,samplerate)

display(Audio(shift_data,rate=samplerate))

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim=spectrogram_afterfilter(shift_data,shift_data.shape[0],length,samplerate,min_db,max_db)

In [None]:
import random

fol_path="D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_wav"
fol_list = os.listdir(fol_path)
save_fol = "D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_aug_wav"
length = 1
shift_length=0
shift_direction = ""

for file in fol_list:
    if file ==".ipynb_checkpoints":
        continue
    samplerate,data,length = read_audio(fol_path+"//"+file) 
    shift_length = random.randint(2,3) * 0.1
    rand_num = random.randint(0,1)
    if(rand_num==0):
        shift_direction  = "left"
    else:
        shift_direction  = "right"
    
    shift_data = manipulate(data, samplerate, shift_length, shift_direction)
    write_audio(save_fol+"/shift_"+file,shift_data)
    plt.figure(figsize=(8, 5))
    spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)

    plt.figure(figsize=(8, 5))
    spectrogram_value_clean_trim=spectrogram_afterfilter(shift_data,shift_data.shape[0],length,samplerate,min_db,max_db)

# Speed up audio

In [None]:
import librosa
def manipulate_speed(data, speed_factor):
    return librosa.effects.time_stretch(data, rate = speed_factor)

In [None]:
fol_path="Clean/SunBear_clean"
fol_list = os.listdir(fol_path)
save_fol = "SpeedUp/SunBear"
length = 1

for file in fol_list:
    if file ==".ipynb_checkpoints":
        continue
    samplerate,data,length = read_audio(fol_path+"/"+file) 
    data=data.astype(float)
    speedUp_data = manipulate_speed(data,1.4)
    int_data = np.int16(speedUp_data)
    write_audio(save_fol+"\\speedUp_"+file,int_data)
    plt.figure(figsize=(8, 5))
    spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)

    plt.figure(figsize=(8, 5))
    spectrogram_value_clean_trim=spectrogram(int_data,int_data.shape[0],int_data.shape[0]/samplerate,samplerate)

# Distort

In [None]:
fol_path="D:\FYP dataset new\MMU folder_FYP_Spectrogram creation\Environment data\Environment noise_wav"
fol_list = os.listdir(fol_path)
save_fol = "Distort/EnvironmentNoise"
length = 1

for file in fol_list:
    if file ==".ipynb_checkpoints":
        continue
    samplerate,data,length = read_audio(fol_path+"\\"+file) 
    normalized_data = data / np.max(np.abs(data), axis=0)
    distortion_threshold = 0.7
    clipped_data = np.clip(normalized_data, -distortion_threshold, distortion_threshold)

    unnormalized_data = clipped_data * np.max(np.abs(data), axis=0)
    int_data = np.int16(unnormalized_data)
    write_audio(save_fol+"\\distort_"+file,int_data)
    plt.figure(figsize=(8, 5))
    spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)

    plt.figure(figsize=(8, 5))
    spectrogram_value_clean_trim=spectrogram(int_data,int_data.shape[0],int_data.shape[0]/samplerate,samplerate)

# Read original sound (1 example)

In [None]:
fol_list=os.listdir("Tapir")

noise_path= "Tapir/"+fol_list[110]
samplerate,data,length = read_audio(noise_path)


signal_show(data,samplerate)

display(Audio(data,rate=samplerate))

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)

In [None]:
fol_list=os.listdir("Clean/Tapir_clean")

noise_path= "Tapir/"+fol_list[110]
samplerate,data,length = read_audio(noise_path)


signal_show(data,samplerate)

display(Audio(data,rate=samplerate))

plt.figure(figsize=(8, 5))
spectrogram_value_clean_trim,min_db,max_db=spectrogram(data,data.shape[0],length,samplerate)