In [40]:
import joblib
from joblib import load
import sklearn
import pandas as pd
import librosa
import signal
import numpy as np
from scipy import stats
from fdacoefs import BL, B



In [46]:

print('PYTHON/ Starting the Python script')
#filename = 'relay.csv'
filename = 'plant_sound1.csv'
#filename = 'mouth_sound.csv'

def feature_extraction(filename):
    # Read the wav file
    non_filt_data = np.genfromtxt(filename, delimiter=',', skip_header=1, usecols=0)
    sample_rate = 500000

    # Assuming values is a 2D numpy array, B is the filter coefficients array,
    # tar is the target index, and samplemax is the number of samples

    data = np.zeros(len(non_filt_data))

    for i in range(len(non_filt_data)):
        for j in range(BL):
            if i - j < 0:
                continue
            data[i] += non_filt_data[i - j] * B[j]

    #normalise data
    datamax = np.max(np.abs(data))
    data = data/datamax
    # total energy
    energy = np.sum(np.square(data))
    # energy entropy
    energy_entropy = stats.entropy(np.abs(data))
    # spectral entropy
    fft_result = np.fft.rfft(data)
    fft_freq = np.fft.rfftfreq(len(data), 1/sample_rate)
    spectral_entropy = stats.entropy(np.abs(fft_result))
    # max frequency
    max_peak_index = np.argmax(np.abs(fft_result))
    max_freq = fft_freq[max_peak_index]
    # MFCCs
    mfccs = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=13, n_fft=512, hop_length=256)
    #mfccs = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=13, n_fft=512, hop_length=256, n_mels=40)
    # Mean of MFCCs
    mfccs_mean = np.mean(mfccs, axis=1)

    # return energy, energy_entropy, spectral_entropy, max_freq, list(mfccs_mean)
    return max_freq, list(mfccs_mean)


#svc_poly = load('svc_polyv2.joblib')
# pipeline = load('svc_polyv4.pkl')
scaler = load('test_scaler.pkl')
model = load('test_model.pkl')

print('PYTHON/ Model loaded')
# Initialize an empty DataFrame to store the results
#df = pd.DataFrame(columns=['Energy', 'Energy Entropy', 'Spectral Entropy', 'Max Frequency'] + [f'MFCC Mean {i+1}' for i in range(13)])
df = pd.DataFrame(columns=['Max Frequency'] + [f'MFCC Mean {i+1}' for i in range(13)])

features = feature_extraction(filename)
# Collect all the features into a list
input_features = [features[0]] + list(features[1])
# Create a Series with the input features and the column names of df
row = pd.Series(input_features, index=df.columns)
# Write the series to the second row of df
df.loc[0] = row

# Transform the data
df_std = scaler.transform(df)

# Assuming df_std is your numpy array and df is your original DataFrame
df_std = pd.DataFrame(df_std, columns=df.columns)


# Convert to DataFrame for better visualization
#X_train_standardized = pd.DataFrame(X_train_standardized, columns=X_train.columns)
# print("df looks like this: ", df)
# print("df_std looks like this: ", df_std)
#df_std = pipeline.transform(df)
#print("\ninput features: ", df_std)
# Use the model to predict the class of the input
predicted_class = model.predict(df_std) #apparently need to pass a list of lists because predict expects a 2D array



print('\nPredicted class:', predicted_class)
if predicted_class == 'plant':
    print('PYTHON/ Plant sound detected')
else:
    print('PYTHON/ Not a plant sound')








PYTHON/ Starting the Python script
PYTHON/ Model loaded

Predicted class: ['noise']
PYTHON/ Not a plant sound


  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


In [30]:
df

Unnamed: 0,Max Frequency,MFCC Mean 1,MFCC Mean 2,MFCC Mean 3,MFCC Mean 4,MFCC Mean 5,MFCC Mean 6,MFCC Mean 7,MFCC Mean 8,MFCC Mean 9,MFCC Mean 10,MFCC Mean 11,MFCC Mean 12,MFCC Mean 13
0,51948.051948,-590.0494,-205.965939,-47.607262,43.677588,-27.982345,9.673103,-7.300111,-22.518293,15.112685,-18.491946,-12.881574,18.95202,-5.601823


In [31]:
df_std

Unnamed: 0,Max Frequency,MFCC Mean 1,MFCC Mean 2,MFCC Mean 3,MFCC Mean 4,MFCC Mean 5,MFCC Mean 6,MFCC Mean 7,MFCC Mean 8,MFCC Mean 9,MFCC Mean 10,MFCC Mean 11,MFCC Mean 12,MFCC Mean 13
0,0.305337,0.691917,2.053455,-0.330451,-0.984353,-1.766229,2.602761,-0.318562,-4.439827,2.399623,-2.990623,-2.082376,4.403262,-1.94855


In [32]:
# Access the mean of each feature
mean = scaler.mean_

# Access the variance of each feature
var = scaler.var_

print("Mean:", mean)
print("Variance:", var)

Mean: [ 4.53931599e+04 -6.41381439e+02 -2.56036460e+02 -3.34321426e+01
  5.94809669e+01 -1.12316121e+00 -1.44016021e+01 -5.08573374e+00
 -5.55022205e-01  2.85856294e+00 -5.69657508e+00 -3.84223164e+00
  2.12047341e+00  1.53291322e+00]
Variance: [4.60863679e+08 5.50387628e+03 5.94557625e+02 1.84009273e+03
 2.57749855e+02 2.31255237e+02 8.55565551e+01 4.83187723e+01
 2.44715746e+01 2.60782477e+01 1.83055311e+01 1.88432285e+01
 1.46116398e+01 1.34070310e+01]
