In [1]:
import numpy as np
from glob import glob
import pandas as pd 
import librosa
import matplotlib.pyplot as plt

In [2]:
FOLDER_PATH = '../../data/splitted_audio/*'
DATA_PATHS = [
    {"audio" : "../../data/audio_27092022_1.WAV", "lidar" : "../../data/lidar_data_with_audio_timestamps_27_09_22_1.csv"}, 
    {"audio" : "../../data/audio_27092022_2.WAV", "lidar" : "../../data/lidar_data_with_audio_timestamps_27_09_22_2.csv"},
]
SAMPLING_RATE = 32000
TRAIN_SIZE = 1000

In [3]:
audio_files = glob(FOLDER_PATH)
loaded_audio_files = []
len(audio_files)

1197

In [4]:
def load_sound():
    for i in range(len(audio_files)):
        if (i%100==0):
            print(f'{i} of {len(audio_files)} loaded')
        s, sr = librosa.load(audio_files[i])
        loaded_audio_files.append(s)

In [5]:
def find_zero_crossings():
    print("finding zero crossing rate")
    crossings = []
    for i in range(len(loaded_audio_files)):
        if (i%100==0):
            print(f'{i} of {len(audio_files)} loaded')
        zero_crossing=librosa.zero_crossings(loaded_audio_files[i])
        crossings.append(sum(zero_crossing))
    print("DONE with zero crossings")
    return crossings

In [6]:
def find_MFFC():
    print("Finding MFCC vectors")
    mfcc_coef = []
    for i in range(len(loaded_audio_files)):
        if (i%100==0):
            print(f'{i} of {len(loaded_audio_files)} loaded')
        mfcc_coef.append(mffc_extractor(loaded_audio_files[i]))
    return mfcc_coef


def mffc_extractor(file):
    mfccs_features = librosa.feature.mfcc(y=file, sr=SAMPLING_RATE)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    return mfccs_scaled_features


In [7]:
def build_feature_frame():
    csvs = []
    for path in DATA_PATHS:
        df = pd.read_csv(path["lidar"])
        y, sr = librosa.load(path["audio"], sr=None)
        audio_length = librosa.get_duration(y=y, sr=sr)
        df =df[(df["audio_start_s"] >= 0) & (df["audio_end_s"] <= audio_length)]

    load_sound()
    df = pd.concat(map(pd.read_csv, CSV_PATHS), ignore_index=True)
    
    df["zero_crossing"] = find_zero_crossings()
    mfcc = pd.DataFrame(find_MFFC())
    df = df.merge(mfcc, left_index=True, right_index=True)
    return df

df = build_feature_frame()
df.head()

0 of 1197 loaded
100 of 1197 loaded
200 of 1197 loaded
300 of 1197 loaded
400 of 1197 loaded
500 of 1197 loaded
600 of 1197 loaded
700 of 1197 loaded
800 of 1197 loaded
900 of 1197 loaded
1000 of 1197 loaded
1100 of 1197 loaded
finding zero crossing rate
0 of 1197 loaded
100 of 1197 loaded
200 of 1197 loaded
300 of 1197 loaded
400 of 1197 loaded
500 of 1197 loaded
600 of 1197 loaded
700 of 1197 loaded
800 of 1197 loaded
900 of 1197 loaded
1000 of 1197 loaded
1100 of 1197 loaded
DONE with zero crossings


ValueError: Length of values (1197) does not match length of index (2498)

In [None]:
def find_spectral_centroid():
    spectrals = []
    for i in range(len(loaded_audio_files)):
        spec_cent=librosa.feature.spectral_centroid(loaded_audio_files[i])
        

In [None]:
%pip install pre pycaret

In [None]:
data = df.drop(["height","class_1","class_2","audio_start_s","audio_end_s", "Unnamed: 0"], axis = 1)
find_height = df.drop(["width","class_1","class_2","audio_start_s","audio_end_s", "Unnamed: 0"], axis = 1)
#width.to_csv("train_width")
#find_height.to_csv("train_height")

train = data.iloc[:TRAIN_SIZE,:]
test = data.iloc[TRAIN_SIZE:,:]
average_furartion = np.mean(train["duration"])
average_furartion

In [None]:
train_duration = pd.DataFrame(loaded_audio_files)
train_duration = np.abs(train_duration)
train_duration

In [None]:
train_duration["duration"] = data["duration"]
train_duration["duration"]


In [None]:
from pycaret.regression import *
#S_duration = setup(train, target="width")
#best_duration = compare_models()

In [None]:
def prepare_song(song_path):
  list_matrices = []
  for song_piece in loaded_audio_files:
    melspect = librosa.feature.melspectrogram(song_piece)
    list_matrices.append(melspect)
  return list_matrices

In [None]:
#numeric = train_duration.drop(["duration"], axis=1 ).columns
S = setup(train_duration, target="duration",
 #numeric_features=numeric 
 )
best = compare_models()

In [None]:
#evaluate_model(best)
blender = blend_models(best)

In [None]:
# Predict on the test set 
y_test_pred = best.predict(test.drop(["width"], axis=1))

# Generate submission dataframe 
# NOTE: It is important that the ID and predicted values match
submission = pd.DataFrame()
#submission['id'] = width_test.id
submission['predicted'] = np.asarray(y_test_pred)

# Save it to disk (`index=False` means don't save the index in the csv)
#submission.to_csv('sample_submission.csv', index=False)
for i in range(len(y_true)):
    if y_true[i] > 55:
        print("actual: ", y_true[i],"guess: " ,y_test_pred[i], "diff:", abs(y_true[i]- y_test_pred[i]))
    

In [None]:
def mape(y_test, pred):
    y_test, pred = np.array(y_test), np.array(pred)
    mape = np.mean(np.abs((y_test - pred) / y_test))
    return mape

In [None]:
def rmsle(y_true, y_pred):
    """
    Computes the Root Mean Squared Logarithmic Error 
    
    Args:
        y_true (np.array): n-dimensional vector of ground-truth values 
        y_pred (np.array): n-dimensional vecotr of predicted values 
    
    Returns:
        A scalar float with the rmsle value 
    
    Note: You can alternatively use sklearn and just do: 
        `sklearn.metrics.mean_squared_log_error(y_true, y_pred) ** 0.5`
    """
    assert (y_true >= 0).all(), 'Received negative y_true values'
    assert (y_pred >= 0).all(), 'Received negative y_pred values'
    assert y_true.shape == y_pred.shape, 'y_true and y_pred have different shapes'
    y_true_log1p = np.log1p(y_true)  # log(1 + y_true)
    y_pred_log1p = np.log1p(y_pred)  # log(1 + y_pred)
    return np.sqrt(np.mean(np.square(y_pred_log1p - y_true_log1p)))



# Calculate rmsle for a few example predictions 
y_true = test.width.values
n = len(y_test_pred)
mean = np.array([np.mean(y_true) for _ in range(n)])
print('A couple of RMSLE scores computed over the train set')
print(f'Perfect prediction: {rmsle(y_true, y_true):.4f}')
print(f'All average prediciton: {rmsle(y_true, mean):.4f}')
print(f'All ones prediction: {rmsle(y_true, np.ones(n)):.4f}')
print(f'actual prediction: {rmsle(y_true, y_test_pred):.4f}')
print(f'actual prediction: {mape(y_true, y_test_pred):.4f}')


In [None]:
from sklearn.metrics import mean_absolute_error

#print("pred: ", mean_absolute_error(y_true, y_test_pred))
print("average: ", mean_absolute_error([average_furartion for i in range(len(train))], train["duration"] ))

In [None]:
from scipy.signal import savgol_filter

sound = loaded_audio_files[4]
wawe_form = np.abs(sound)
yhat = savgol_filter(wawe_form, 5001, 2)
S = librosa.magphase(librosa.stft(sound, window=np.ones, center=False))[0]
k =librosa.feature.rms(S=S, center=True)
times = librosa.times_like(k)
plt.plot(times, k[0])
#plt.plot(wawe_form)

#for i in find_length_of_audio(sound):
#    plt.axvline(x=i, color="red")

print(data["duration"].loc[4],)

In [None]:
thershold, error

In [None]:
pred_length = []
for i in range(len(loaded_audio_files)):
    pred_length.append(find_length_of_audio(loaded_audio_files[i])) 



In [None]:
train["duration"] = pred_length[0:1000]
train

In [None]:
S = setup(train,target="width", session_id=1234 )


In [None]:
best = compare_models(exclude=["llar", "dummy"])

In [None]:
# Predict on the test set 
y_test_pred = best.predict(test.drop(["width"], axis=1))

# Generate submission dataframe 
# NOTE: It is important that the ID and predicted values match
submission = pd.DataFrame()
#submission['id'] = width_test.id
submission['predicted'] = np.asarray(y_test_pred)

# Save it to disk (`index=False` means don't save the index in the csv)
#submission.to_csv('sample_submission.csv', index=False)
for i in range(len(y_true)):
    if y_true[i] > 55:
        print("actual: ", y_true[i],"guess: " ,y_test_pred[i], "diff:", abs(y_true[i]- y_test_pred[i]))
    

In [None]:
import sklearn

x = loaded_audio_files[12]
spectral_centroids = librosa.feature.spectral_centroid(x, sr = SAMPLING_RATE)[0]
#spectral_centroids.shape(775, )
# Computing the time variable
#for visualization
plt.figure(figsize = (12, 4))
frames = range(len(spectral_centroids))
print(len(frames))
t = librosa.frames_to_time(frames)
# Normalising the spectral centroid
#for visualisation
def normalize(x, axis = 0):
  return sklearn.preprocessing.minmax_scale(x, axis = axis)
#Plotting the Spectral Centroid along the waveform
librosa.display.waveshow(x, sr = SAMPLING_RATE)
plt.plot(t, normalize(spectral_centroids), color = 'b')

In [None]:
spectral_bandwidth_2 = librosa.feature.spectral_bandwidth(x + 0.01, sr = SAMPLING_RATE)[0]
spectral_bandwidth_3 = librosa.feature.spectral_bandwidth(x + 0.01, sr = SAMPLING_RATE, p = 3)[0]
spectral_bandwidth_4 = librosa.feature.spectral_bandwidth(x + 0.01, sr = SAMPLING_RATE, p = 4)[0]
plt.figure(figsize = (15, 9))
librosa.display.waveshow(x, sr = SAMPLING_RATE, alpha = 0.4)
plt.plot(t, normalize(spectral_bandwidth_2), color = 'r')
plt.plot(t, normalize(spectral_bandwidth_3), color = 'g')
plt.plot(t, normalize(spectral_bandwidth_4), color = 'y')

In [None]:
k = loaded_audio_files[105]
S1 = np.abs(librosa.stft(m))
imgdb = librosa.display.specshow(librosa.power_to_db(S1**2, ref=np.max),
                                 sr=32000, y_axis='log', x_axis='time')
plt.plot()