In [None]:
import numpy as np
from glob import glob
import pandas as pd 
import librosa
import matplotlib.pyplot as plt

In [None]:
FOLDER_PATH = '../../data/splitted_audio/*'
DATA_PATHS = [
    {"audio" : "../../data/audio_27092022_1.WAV", "lidar" : "../../data/lidar_data_with_audio_timestamps_27_09_22_1.csv"}, 
    {"audio" : "../../data/audio_27092022_2.WAV", "lidar" : "../../data/lidar_data_with_audio_timestamps_27_09_22_2.csv"},
]
SAMPLING_RATE = 32000
TRAIN_SIZE = 1000

In [None]:
audio_files = glob(FOLDER_PATH)
loaded_audio_files = []
audio_files = audio_files[0:-1]
audio_files


In [None]:
def load_sound():
    for i in range(len(audio_files)):
        if (i%100==0):
            print(f'{i} of {len(audio_files)} loaded')
        s, sr = librosa.load(audio_files[i])
        loaded_audio_files.append(s)

In [None]:
def find_zero_crossings():
    print("finding zero crossing rate")
    crossings = []
    for i in range(len(loaded_audio_files)):
        if (i%100==0):
            print(f'{i} of {len(audio_files)} loaded')
        zero_crossing=librosa.zero_crossings(loaded_audio_files[i])
        crossings.append(sum(zero_crossing))
    print("DONE with zero crossings")
    return crossings

In [None]:
def find_MFFC():
    print("Finding MFCC vectors")
    mfcc_coef = []
    for i in range(len(loaded_audio_files)):
        if (i%100==0):
            print(f'{i} of {len(loaded_audio_files)} loaded')
        mfcc_coef.append(mffc_extractor(loaded_audio_files[i]))
    return mfcc_coef


def mffc_extractor(file):
    mfccs_features = librosa.feature.mfcc(y=file, sr=SAMPLING_RATE)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    return mfccs_scaled_features


In [None]:
def build_feature_frame():
    load_sound()
    df = pd.read_csv("../../data/splitted_audio/full_data_270922.csv")    
    df["zero_crossing"] = find_zero_crossings()
    mfcc = pd.DataFrame(find_MFFC())
    df = df.merge(mfcc, left_index=True, right_index=True)
    return df    
    
df = build_feature_frame()
df

In [None]:
def find_spectral_centroid():
    spectrals = []
    for i in range(len(loaded_audio_files)):
        spec_cent=librosa.feature.spectral_centroid(loaded_audio_files[i])
        

In [None]:
%pip install pycaret

In [None]:
data = df.drop(["height","class_1","class_2","audio_start_s","audio_end_s", "Unnamed: 0"], axis = 1)
find_height = df.drop(["width","class_1","class_2","audio_start_s","audio_end_s", "Unnamed: 0"], axis = 1)
#width.to_csv("train_width")
#find_height.to_csv("train_height")

train = data.iloc[:TRAIN_SIZE,:]
test = data.iloc[TRAIN_SIZE:,:]
average_furartion = np.mean(train["duration"])
average_furartion

In [None]:
train_duration = pd.DataFrame(loaded_audio_files)
train_duration = np.abs(train_duration)
train_duration

In [None]:
train_duration["duration"] = data["duration"]
train_duration["duration"]


In [None]:
from pycaret.regression import *


In [None]:
#numeric = train_duration.drop(["duration"], axis=1 ).columns
S = setup(train, target="width",
 #numeric_features=numeric 
 )
best = compare_models()

In [None]:
def mape(y_test, pred):
    y_test, pred = np.array(y_test), np.array(pred)
    mape = np.mean(np.abs((y_test - pred) / y_test))
    return mape

In [None]:
def rmsle(y_true, y_pred):
    """
    Computes the Root Mean Squared Logarithmic Error 
    
    Args:
        y_true (np.array): n-dimensional vector of ground-truth values 
        y_pred (np.array): n-dimensional vecotr of predicted values 
    
    Returns:
        A scalar float with the rmsle value 
    
    Note: You can alternatively use sklearn and just do: 
        `sklearn.metrics.mean_squared_log_error(y_true, y_pred) ** 0.5`
    """
    assert (y_true >= 0).all(), 'Received negative y_true values'
    assert (y_pred >= 0).all(), 'Received negative y_pred values'
    assert y_true.shape == y_pred.shape, 'y_true and y_pred have different shapes'
    y_true_log1p = np.log1p(y_true)  # log(1 + y_true)
    y_pred_log1p = np.log1p(y_pred)  # log(1 + y_pred)
    return np.sqrt(np.mean(np.square(y_pred_log1p - y_true_log1p)))

y_test_pred = best.predict(test.drop(["width"], axis=1))
y_true = test.width.values
print(f'actual prediction REMSLE: {rmsle(y_true, y_test_pred):.4f}')
print(f'actual prediction MAPE: {mape(y_true, y_test_pred):.4f}')


In [None]:
from sklearn.metrics import mean_absolute_error

print("pred: ", mean_absolute_error(y_true, y_test_pred))
#print("average: ", mean_absolute_error([average_furartion for i in range(len(train))], train["duration"] ))

In [None]:
S_1 = setup(train.drop(["duration"], axis=1), target="width")

In [None]:
best_1 =compare_models()

In [None]:
y_test_pred = best_1.predict(test.drop(["width", "duration"], axis=1))
y_true = test.width.values
print("MAE:" ,mean_absolute_error(y_test_pred, y_true))

In [None]:
import sklearn

x = loaded_audio_files[12]
spectral_centroids = librosa.feature.spectral_centroid(x, sr = SAMPLING_RATE)[0]
#spectral_centroids.shape(775, )
# Computing the time variable
#for visualization
plt.figure(figsize = (12, 4))
frames = range(len(spectral_centroids))
print(len(frames))
t = librosa.frames_to_time(frames)
# Normalising the spectral centroid
#for visualisation
def normalize(x, axis = 0):
  return sklearn.preprocessing.minmax_scale(x, axis = axis)
#Plotting the Spectral Centroid along the waveform
librosa.display.waveshow(x, sr = SAMPLING_RATE)
plt.plot(t, normalize(spectral_centroids), color = 'b')

In [None]:
spectral_bandwidth_2 = librosa.feature.spectral_bandwidth(x + 0.01, sr = SAMPLING_RATE)[0]
spectral_bandwidth_3 = librosa.feature.spectral_bandwidth(x + 0.01, sr = SAMPLING_RATE, p = 3)[0]
spectral_bandwidth_4 = librosa.feature.spectral_bandwidth(x + 0.01, sr = SAMPLING_RATE, p = 4)[0]
plt.figure(figsize = (15, 9))
librosa.display.waveshow(x, sr = SAMPLING_RATE, alpha = 0.4)
plt.plot(t, normalize(spectral_bandwidth_2), color = 'r')
plt.plot(t, normalize(spectral_bandwidth_3), color = 'g')
plt.plot(t, normalize(spectral_bandwidth_4), color = 'y')

In [None]:
k = loaded_audio_files[105]
S1 = np.abs(librosa.stft(m))
imgdb = librosa.display.specshow(librosa.power_to_db(S1**2, ref=np.max),
                                 sr=32000, y_axis='log', x_axis='time')
plt.plot()