**import libraries**

In [131]:
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from glob import glob

In [2]:
import librosa as lr

**upload dataset from google drive**

In [3]:
!gdown --id 1--C4P9SSZhYnTITWyYpEzNXZwpOKM6tW

Downloading...
From: https://drive.google.com/uc?id=1--C4P9SSZhYnTITWyYpEzNXZwpOKM6tW
To: /content/dataset.zip
100% 35.0M/35.0M [00:00<00:00, 68.3MB/s]


**Extract dataset**

In [4]:
!unzip dataset.zip

Archive:  dataset.zip
   creating: dataset/
   creating: dataset/1/
  inflating: dataset/1/1-(1).mp3     
  inflating: dataset/1/10-(1).mp3    
  inflating: dataset/1/11-(1).mp3    
  inflating: dataset/1/12-(1).mp3    
  inflating: dataset/1/13-(1).mp3    
  inflating: dataset/1/14-(1).mp3    
  inflating: dataset/1/15-(1).mp3    
  inflating: dataset/1/16-(1).mp3    
  inflating: dataset/1/17-(1).mp3    
  inflating: dataset/1/18-(1).mp3    
  inflating: dataset/1/19-(1).mp3    
  inflating: dataset/1/2-(1).mp3     
  inflating: dataset/1/20-(1).mp3    
  inflating: dataset/1/21-(1).mp3    
  inflating: dataset/1/22-(1).mp3    
  inflating: dataset/1/23-(1).mp3    
  inflating: dataset/1/24-(1).mp3    
  inflating: dataset/1/25-(1).mp3    
  inflating: dataset/1/26-(1).mp3    
  inflating: dataset/1/27-(1).mp3    
  inflating: dataset/1/28-(1).mp3    
  inflating: dataset/1/29-(1).mp3    
  inflating: dataset/1/3-(1).mp3     
  inflating: dataset/1/30-(1).mp3    
  inflating: dataset

**Prepare audio data**

In [3]:
def preprocess_dataset():
    data_dir = '/content/dataset/*/*.mp3'
    audio_files = glob(data_dir)
    signal, sample_rate = lr.load(audio_files[0])
    MFCCs = lr.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=10, n_fft=2048, hop_length=512)
    return MFCCs

In [4]:
mfcc = preprocess_dataset()



In [5]:
labels = ['اوراق', 'ارز', 'سکه', 'بانک', 'طلا', 'نفت', 'مشتقات', 'فلزات', 'صندوق سهامی', 'صندوق درآمد ثابت', 'صندوق مختلط', 'صندوق قابل معامله']

In [6]:
len(labels)

12

### **Preprocessing data**

In [8]:
DATASET_PATH = '/content/dataset/*/*.mp3'
JSON_PATH = 'data.json'
SAMPLES_TO_CONSIDER = 22050 


def preprocess_audio_files(dataset_path):
  data = {
        "labels": [],
        "MFCCs": []
  }
  counter = 0
  audio_files = glob(dataset_path)
  for i,audio in enumerate(audio_files):
    signal, sample_rate = lr.load(audio)
    if len(signal) >= SAMPLES_TO_CONSIDER:
      signal = signal[:SAMPLES_TO_CONSIDER]
      MFCCs = lr.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=10, n_fft=2048, hop_length=512)
      data["MFCCs"].append(MFCCs.T.tolist())
      data["labels"].append(labels[counter])
      if (i+1) % 40 == 0:
        counter += 1
        print(counter)
    
  # save data in json file
  with open(JSON_PATH, "w", encoding='utf-8') as f:
      json.dump(data, f, ensure_ascii=False) 

  return None    

In [9]:
%%time

preprocess_audio_files(DATASET_PATH)



1




2




3




4




5




6




7




8




9




10




11




12
CPU times: user 48 s, sys: 10.7 s, total: 58.7 s
Wall time: 1min 36s


### **Read dataset**

In [10]:
def load_data(data_path):
    with open(data_path, "r") as f:
        data = json.load(f)

    X = np.array(data["MFCCs"])
    y = np.array(data["labels"])
    return X, y

In [11]:
X, y = load_data('data.json')

In [12]:
X.shape

(479, 44, 10)

In [13]:
y[0:10]

array(['اوراق', 'اوراق', 'اوراق', 'اوراق', 'اوراق', 'اوراق', 'اوراق',
       'اوراق', 'اوراق', 'اوراق'], dtype='<U17')

In [134]:
categories = pd.get_dummies(y).columns

In [137]:
categories

Index(['ارز', 'اوراق', 'بانک', 'سکه', 'صندوق درآمد ثابت', 'صندوق سهامی',
       'صندوق قابل معامله', 'صندوق مختلط', 'طلا', 'فلزات', 'مشتقات', 'نفت'],
      dtype='object')

In [14]:
y_one_hot = pd.get_dummies(y).values

In [15]:
y_one_hot

array([[0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=50)

In [45]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(383, 44, 10)
(96, 44, 10)
(383, 12)
(96, 12)


### **Build Model**

In [33]:
def build_model(input_shape):

    model = tf.keras.models.Sequential()

    # 1st conv layer
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=tf.keras.regularizers.l2(0.001)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.MaxPooling2D((3, 3), strides=(2,2), padding='same'))

    # 2nd conv layer
    # model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)))
    # model.add(tf.keras.layers.BatchNormalization())
    # model.add(tf.keras.layers.MaxPooling2D((3, 3), strides=(2,2), padding='same'))

    # 3rd conv layer
    # model.add(tf.keras.layers.Conv2D(32, (2, 2), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)))
    # model.add(tf.keras.layers.BatchNormalization())
    # model.add(tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2), padding='same'))

    # flatten output and feed into dense layer
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    tf.keras.layers.Dropout(0.5)

    # softmax output layer
    model.add(tf.keras.layers.Dense(12, activation='softmax'))

    optimiser = tf.optimizers.Adam(learning_rate=0.0001)
    # compile model
    model.compile(optimizer=optimiser,
                  loss='categorical_crossentropy',
                  metrics=["accuracy"])

    # print model parameters on console
    model.summary()

    return model

In [34]:
X_train.shape

(383, 44, 10)

In [35]:
input_shape = (X_train.shape[1], X_train.shape[2], 1)
model = build_model(input_shape)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 42, 8, 64)         640       
                                                                 
 batch_normalization_3 (Batc  (None, 42, 8, 64)        256       
 hNormalization)                                                 
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 21, 4, 64)        0         
 2D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 5376)              0         
                                                                 
 dense_4 (Dense)             (None, 64)                344128    
                                                                 
 dense_5 (Dense)             (None, 12)               

In [36]:
def train(model, epochs, batch_size, patience, X_train, y_train, X_validation, y_validation):

    earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor="accuracy", min_delta=0.001, patience=patience)

    # train model
    history = model.fit(X_train,
                        y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        validation_data=(X_validation, y_validation),
                        callbacks=[earlystop_callback])
    return history


In [37]:
EPOCHS = 100
BATCH_SIZE = 32
PATIENCE = 5

In [38]:
history = train(model, EPOCHS, BATCH_SIZE, PATIENCE, X_train.astype(np.float32), y_train, X_test.astype(np.float32), y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100


In [32]:
# evaluate network on test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print("\nTest loss: {}, test accuracy: {}".format(test_loss, 100*test_acc))


Test loss: 3.0333518981933594, test accuracy: 19.79166716337204


In [120]:
model.save('model.h5')

### **Prediction**

In [138]:
def prediction(audio_path):
    audio_file = glob(audio_path)
    signal, sample_rate = lr.load(audio_file[0])
    if len(signal) >= SAMPLES_TO_CONSIDER:
      signal = signal[:SAMPLES_TO_CONSIDER]
      MFCCs = lr.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=10, n_fft=2048, hop_length=512).T
      MFCCs = MFCCs.reshape(1,44,10,1)
      prediction = model.predict(MFCCs)[0]
      index = np.where(prediction == np.amax(prediction))[0][0]
      result = categories[index]
    return result

In [144]:
prediction('/content/dataset/1/7-(1).mp3')





'فلزات'