In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from scipy import io, misc

In [3]:
duration_samples = 0.2 #seconds
size_max = 120000
frequence = 10000 #Hertz

In [4]:
file = "dataset.mat"

dataset = scipy.io.loadmat(file)

df_normal = dataset["normal"].reshape(-1)[:size_max]
df_inner = dataset["inner"].reshape(-1)[:size_max]
df_roller = dataset["roller"].reshape(-1)[:size_max]
df_outer = dataset["outer"].reshape(-1)[:size_max]

data = [df_normal,df_inner,df_roller,df_outer]

0- normal defect
1- inner defect
2- roller defect
3- outer defect

In [5]:
def load_data(train=0.7):
    type_track = 0
    n_samples_each = int(size_max/frequence/duration_samples)
    audios_train = []
    audios_test = []
    number_train=int(n_samples_each*0.7)

    for track in data:
        for i in range(0,n_samples_each):
            t1 = int(i*frequence*duration_samples)
            t2 = int((i+1)*frequence*duration_samples)
            new = list(track)[t1:t2]
            if i<number_train:
                audios_train.append((type_track,new))
            else:
                audios_test.append((type_track,new))
        type_track = type_track+1
    np.random.seed(1)
    np.random.shuffle(audios_train)
    np.random.seed(1)
    np.random.shuffle(audios_test)
    return [i[1] for i in audios_train], [i[1] for i in audios_test], [i[0] for i in audios_train], [i[0] for i in audios_test]

In [6]:
audios_train, audios_test, label_train, label_test = load_data()

In [7]:
import IPython
IPython.display.Audio(audios_train[2], rate=frequence)

In [8]:
type(audios_train[0])

list

In [9]:
import librosa
y= np.array(audios_train[0])
sr=frequence
# audio_data, sample_rate= librosa.load(y=y,sr=sr)
mfcc= librosa.feature.mfcc(y=y,sr=sr)
mfcc

  return f(*args, **kwargs)


array([[-122.66910248,  -92.38567051,  -90.14709196,  -95.04445657],
       [  65.11893795,   65.22694178,   65.74420185,   62.49582789],
       [ -23.78778531,  -22.8203103 ,  -24.81519375,  -24.33575148],
       [  33.9697855 ,   36.7075763 ,   38.60036311,   39.16014693],
       [  19.13609532,   18.71309184,   18.6506835 ,   20.118161  ],
       [ -14.59032857,  -14.31039357,  -13.29117492,  -11.43985052],
       [  -3.7035524 ,   -6.63739772,  -10.82981239,  -11.13944179],
       [   7.87573437,    2.31676845,    1.22366204,    2.58736084],
       [ -20.97423589,  -18.75185994,  -18.8267292 ,  -15.36336298],
       [  11.01888393,    8.41455642,    2.4582269 ,    6.09945736],
       [   7.08547832,    6.72868892,    4.85371586,    4.597242  ],
       [  -8.27849928,  -13.05751705,  -12.613952  ,  -12.84161621],
       [ -12.54682523,  -15.42040138,  -13.41485823,  -13.03844075],
       [  -6.00353258,   -5.68629068,   -6.50225963,   -4.82829332],
       [   5.21023325,    4.028929

In [10]:
mfcc.shape

(20, 4)

In [11]:
def feature_extract(l):
    y=np.array(l)
    sr=frequence
    mfcc= librosa.feature.mfcc(y=y,sr=sr)
    mfcc_scaled=np.mean(mfcc.T, axis=0)
    
    return mfcc_scaled

In [12]:
extracted_features=[]
i=0
for audio in audios_train:
    data=feature_extract(audio)
    label= label_train[i]
    extracted_features.append([data,label])
    i=i+1

In [13]:
df= pd.DataFrame(extracted_features, columns=['feature','class'])
df.head()

Unnamed: 0,feature,class
0,"[-100.06158038013302, 64.64647736755063, -23.9...",0
1,"[35.74615329316609, -53.1169484254411, 4.16197...",3
2,"[-101.55683725535413, 62.47573651477131, -24.4...",0
3,"[40.669772226180235, -51.51024941922913, 1.916...",3
4,"[-63.955624976208675, 13.54734617515508, -25.1...",2


In [14]:
# split the dataset into dependent and independent features
x=np.array(df['feature'].tolist())
y=np.array(df['class'].tolist())

In [15]:
x.shape

(168, 20)

In [19]:
y.shape

(168,)

In [18]:
from keras.utils.np_utils import to_categorical  
import numpy as np
y_hot_train = to_categorical(y,num_classes =4)
y_hot_train[4]

array([0., 0., 1., 0.], dtype=float32)

In [20]:
y_hot_train.shape

(168, 4)

# Model Creation

In [21]:
import tensorflow as tf
print(tf.__version__)

2.9.1


In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics


In [23]:
### No of classes
num_labels=y_hot_train.shape[1]

In [24]:
Dense()

TypeError: __init__() missing 1 required positional argument: 'units'

In [37]:
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(20,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [38]:
model.summery()

AttributeError: 'Sequential' object has no attribute 'summery'

In [39]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [40]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 100
num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
#                                verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x,y_hot_train)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Training completed in time:  0:00:01.130302


In [41]:
test_accuracy=model.evaluate(x,y_hot_train,verbose=0)
print(test_accuracy[1])

0.6726190447807312


In [46]:
arr=np.array(audios_test[0])

In [47]:
arr.shape

(2000,)

In [50]:
test_dummy= feature_extract(audios_test[0])

  return f(*args, **kwargs)


In [52]:
test_dummy.shape
test_dummy= test_dummy.reshape(1,-1)
test_dummy.shape

(1, 20)

In [61]:
ans= model.predict(test_dummy)
ans
# so predicted class is 1
# index of max of this array



array([[6.8920269e-04, 9.3718070e-01, 3.4559824e-02, 2.7570259e-02]],
      dtype=float32)

In [54]:
label_test[0]
# hence predicting correct label

1

In [48]:
arr2=np.array(audios_train[0])

In [49]:
arr2.shape

(2000,)