In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt
import pandas as pd
import librosa
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score 
from sklearn.metrics import f1_score
import os

In [2]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, LSTM, SimpleRNN, Input, Masking, Embedding
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LeakyReLU
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD

In [3]:
from tensorflow.keras.layers import Input

In [2]:
from scipy.stats import zscore

In [4]:
import tensorflow as tf

In [5]:
from sklearn.linear_model import LogisticRegression

### Подготовка обучающей выборки

In [3]:
df = pd.read_csv('targets.tsv', sep='\t')

In [4]:
path_to_dir = '/home/murad/projects/train'

In [5]:
def get_X_Y(df, path_to_dir):
    max_len = 50
    X = []
    Y = df['class'].values
    for file_name in tqdm(df['path'].values):
        path_file = f'{path_to_dir}/{file_name}.wav'
        X.append(process_file_mfcc(path_file))
    X = np.array(X)
    return X, Y

In [76]:
def process_file(path_file):
    max_len = 50
    file, sr = librosa.load(path_file, sr=16000)
    file = file[round(len(file)*0.3): round(len(file)*0.7)]
    features = librosa.feature.melspectrogram(file, sr=16000)
#     features = features[:, :max_len]
#     features = np.pad(features, ((0, 0), (0, max_len - len(features[0]))))
#     features = np.array([zscore(feature) for feature in features])
    features = features.dot(features.transpose())
    features = features.flatten()
    return features

In [10]:
def process_file_pitch(path_file):
    max_len = 50
    file, sr = librosa.load(path_file, sr=16000)
    file = file[round(len(file)*0.3): round(len(file)*0.7)]
    features = librosa.yin(file, 50, 400)
    features = features[:max_len]
    features = np.pad(features, (0, max_len - len(features)))
    return features

In [11]:
def process_file_rnn(path_file):
    max_len = 50
    file, sr = librosa.load(path_file, sr=16000)
    file = file[round(len(file)*0.3): round(len(file)*0.7)]
    features = librosa.feature.melspectrogram(file, sr=16000)
    features = features[:, :max_len]
    features = np.pad(features, ((0, 0), (0, max_len - len(features[0]))))
    return features

In [12]:
def process_file_cnn(path_file):
    max_len = 50
    file, sr = librosa.load(path_file, sr=16000)
    file = file[round(len(file)*0.3): round(len(file)*0.7)]
    features = librosa.feature.melspectrogram(file, sr=16000)
    features = features[:, :max_len]
    features = np.pad(features, ((0, 0), (0, max_len - len(features[0]))))
    features = spec_to_image(features)
    features = features.transpose()
    features = np.expand_dims(features, axis=2)
    return features

In [6]:
def process_file_mfcc(path_file):
    max_len = 50
    file, sr = librosa.load(path_file, sr=16000)
    file = file[round(len(file)*0.3): round(len(file)*0.7)]
    features = librosa.feature.mfcc(file, sr=16000, n_mfcc=40)
    features = np.array([zscore(feature) for feature in features])
    features = features.dot(features.transpose())
#     features = features[:, :max_len]
#     features = np.pad(features, ((0, 0), (0, max_len - len(features[0]))))
    features = features.flatten()
    return features

In [14]:
def spec_to_image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)
    return spec_scaled

In [59]:
path_file = f'{path_to_dir}/{df["path"].iloc[0]}.wav'
a = process_file_cnn(path_file)

In [218]:
file, sr = librosa.load(path_file, sr=16000)

In [224]:
x = librosa.feature.mfcc(file, sr=16000, n_mfcc=40)

In [227]:
x.shape

(40, 104)

In [7]:
X, Y = get_X_Y(df, path_to_dir)

100%|██████████| 13936/13936 [01:53<00:00, 123.32it/s]


In [9]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2, stratify=Y, shuffle=True)

In [11]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

In [265]:
lg = LogisticRegression(random_state=27, n_jobs=7)

In [21]:
clf = RFC(random_state=27, n_jobs=7)

In [25]:
svc = LinearSVC(random_state=27)

In [12]:
knn = KNeighborsClassifier(n_jobs=7)

In [43]:
knn.weights

'uniform'

In [82]:
knn1 = KNeighborsClassifier(n_jobs=7)

In [278]:
nb = GaussianNB()

In [279]:
a = nb.fit(x_train, y_train)

In [26]:
a = svc.fit(x_train, y_train)



In [188]:
a = clf.fit(x_train, y_train)

In [266]:
a = lg.fit(x_train, y_train)

In [20]:
a = knn.fit(x_train, y_train)

In [21]:
y_pred = a.predict(x_test)

In [84]:
y_pred_1 = b.predict(x_test)

In [24]:
accuracy_score(y_test, y_pred)

0.9949784791965567

### Формирование таблицы

In [25]:
path_to_test = '/home/murad/projects/test'

In [26]:
files = os.listdir(path_to_test)

In [27]:
data = {'path': [], 'label': []}

In [None]:
for file in tqdm(files):
    if os.path.getsize(f'{path_to_test}/{file}') < 30000:
        continue
    feats = process_file_mfcc(f'{path_to_test}/{file}')
    label = a.predict([feats])
    data['path'].append(file[:-4])
    data['label'].append(label[0])

In [196]:
df_test = pd.DataFrame(data=data)

### Нейронная сеть

In [78]:
import tensorflow as tf

In [None]:
m

#### RNN model

In [145]:
model = Sequential()
model.add(Input((50, 128)))
model.add(SimpleRNN(64, mask))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [146]:
model.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_9 (SimpleRNN)     (None, 64)                12352     
_________________________________________________________________
dense_34 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_35 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_36 (Dense)             (None, 1)                 33        
Total params: 18,625
Trainable params: 18,625
Non-trainable params: 0
_________________________________________________________________


In [296]:
k = x_train[0]

In [297]:
k.shape

(50, 128)

In [315]:
np.expand_dims(k, axis=2).shape

(50, 128, 1)

In [301]:
z.shape

(50, 128, 1)

In [None]:
la

### CNN model

In [205]:
model = Sequential()
model.add(Input((50, 128, 1)))
model.add(Conv2D(16, kernel_size=(4, 5), activation='linear'))
model.add(LeakyReLU(0.2))
model.add(MaxPooling2D((6, 7), strides=(3, 3)))
model.add(Conv2D(32, kernel_size=(3, 3), activation='linear'))
model.add(LeakyReLU(0.2))
model.add(MaxPooling2D((4, 5), strides=(2, 2)))
model.add(Conv2D(64, kernel_size=(2, 2), activation='linear'))
model.add(LeakyReLU(0.2))
model.add(MaxPooling2D((2, 3), strides=(1, 1)))
model.add(Flatten())
model.add(Dense(2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [206]:
model.summary()

Model: "sequential_36"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_69 (Conv2D)           (None, 47, 124, 16)       336       
_________________________________________________________________
leaky_re_lu_30 (LeakyReLU)   (None, 47, 124, 16)       0         
_________________________________________________________________
max_pooling2d_57 (MaxPooling (None, 14, 40, 16)        0         
_________________________________________________________________
conv2d_70 (Conv2D)           (None, 12, 38, 32)        4640      
_________________________________________________________________
leaky_re_lu_31 (LeakyReLU)   (None, 12, 38, 32)        0         
_________________________________________________________________
max_pooling2d_58 (MaxPooling (None, 5, 17, 32)         0         
_________________________________________________________________
conv2d_71 (Conv2D)           (None, 4, 16, 64)       

In [207]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)

In [208]:
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
early_stopping=EarlyStopping(monitor='val_loss', patience=3)

In [209]:
x_train.shape

(12542, 50, 128, 1)

In [210]:
histoty = model.fit(x_train, y_train, batch_size=32, epochs=30, 
                    verbose=1, validation_split=0.3, callbacks=[early_stopping])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


In [194]:
x_train_s.shape

(12542, 6400)

In [69]:
x_train[4].mean()

154.293770975866

In [237]:
model = Sequential()
model.add(tf.keras.Input(shape=(6400)))
# model.add(Dense(8, activation='relu', input_shape=(6400, )))
# model.add(Dropout(0.2))
# model.add(Dense(4, activation='relu', input_shape=(6400, )))
# model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

In [196]:
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_38 (Dense)             (None, 1)                 6401      
Total params: 6,401
Trainable params: 6,401
Non-trainable params: 0
_________________________________________________________________


In [238]:
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [239]:
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
early_stopping=EarlyStopping(monitor='val_loss', patience=3)

In [240]:
histoty = model.fit(x_train, y_train, batch_size=32, epochs=20, 
                    verbose=1, validation_split=0.3, callbacks=[early_stopping])

Epoch 1/20


ValueError: in user code:

    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:754 train_step
        y_pred = self(x, training=True)
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /home/murad/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:255 assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer sequential_37 is incompatible with the layer: expected axis -1 of input shape to have value 6400 but received input with shape (None, 40, 50)


In [70]:
score = model.evaluate(x_test, y_test, batch_size=32, verbose=1)



In [119]:
path_to_test = '/home/murad/projects/test'

In [127]:
files = os.listdir(path_to_test)

In [129]:
files.sort()

In [143]:
files[245]

'14630b84dcf5dbf2d224a99c0bcbd6ac.wav'

In [141]:
x = process_file(f'{path_to_test}/{files[245]}')

In [142]:
clf.predict([x])

array([0])

In [211]:
from scipy.stats import zscore

In [212]:
a = np.array([1, 2, 3])

In [213]:
zscore(a)

array([-1.22474487,  0.        ,  1.22474487])

In [316]:
a = np.array([[ [1, 2, 3],
                [4, 5, 6]
              ],
              
                [ [7, 8, 9],
                  [10, 11, 12],
                  [2, 2, 2],
                  [1, 1, 1],
                  [1, 2, 3]
                ]
             ], )

  a = np.array([[ [1, 2, 3],


In [317]:
a.shape

(2,)

In [328]:
a = np.matrix([[1, 2, 3, 4, 7],
               [4, 5, 6, 7, 9],
               [7, 8, 9, 10, 11]])

In [329]:
a.shape

(3, 5)

In [321]:
b = np.matrix([[1, 2, 3],
               [4, 5, 6],
               [7, 8, 9]])

In [330]:
b.shape

(3, 3)

In [331]:
c = a.dot(a.transpose())

In [333]:
c.shape

(3, 3)

In [334]:
b = b.dot(b.transpose())

In [335]:
b.shape

(3, 3)

In [308]:
c.shape

(2, 3)