In [26]:
import tensorflow as tf
import librosa

def mfcc(audio, sr, n_mfcc=13, frame_length=1024, frame_step=512):
    if not isinstance(audio, tf.Tensor):
        audio = tf.convert_to_tensor(audio, dtype=tf.float32)

    stfts = tf.signal.stft(
            audio,
            frame_length=frame_length,
            frame_step=frame_step,
            fft_length=frame_length
    )

    spectrograms = tf.abs(stfts) #spektrogram amplitudowy
    num_spectrogram_bins = stfts.shape[-1] #ostatni wymiar tensora [batch_size, time_steps, num_frequencies-liczba binow czestotliwosciowych]
    lower_edge, upper_edge, num_mel_bins = 80.0, sr / 2, 40 #40 filtrow melowych
    
    linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix( #macierz wagowa liniowa -> skala melowa
        num_mel_bins, num_spectrogram_bins, sr, lower_edge, upper_edge)
    mel_spectrograms = tf.tensordot( #mnozenie macierzy
        spectrograms, linear_to_mel_weight_matrix, 1)
    log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6) #log naturalny z mel-spektrogramu
    mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrograms) #obliczanie mfcc ze spektrogramu log
    mfccs = mfccs[..., :n_mfcc]

    return mfccs, log_mel_spectrograms

def porownanie_sygnalow(signal, rozszerzony, sr):
    czas_signal = np.arange(len(signal)) / sr
    czas_rozszerzony = np.arange(len(rozszerzony)) / sr
    fig, ax = plt.subplots(nrows=2)
    ax[0].plot(czas_signal, signal)
    ax[0].set(title="Oryginal", xlabel="Czas [s]", ylabel="Amplituda")
    ax[1].plot(czas_rozszerzony, rozszerzony)
    ax[1].set(title="Rozszerzony", xlabel="Czas [s]", ylabel="Amplituda")
    plt.tight_layout()
    plt.show()
    
if __name__ == "__main__":
    signal, sr = librosa.load("a1.wav", sr=16000)
    mfccs, _ = mfcc(signal, sr) #zwrocenie tylko mfccs
    print(mfccs)
    

tf.Tensor(
[[ 1.87742405e+01  3.43011856e+00 -2.15512633e+00  1.82829463e+00
  -3.74581873e-01  5.96915364e-01 -1.57334518e+00 -3.92444283e-01
   6.81503043e-02  3.38592768e-01 -1.31430149e+00 -1.53580219e-01
   4.84670438e-02]
 [ 1.57665071e+01  3.78458524e+00 -2.62218142e+00  2.16385841e+00
  -4.06966597e-01  8.65159929e-01 -1.91162229e+00 -3.86939943e-03
   2.51696289e-01  6.05912030e-01 -1.07991052e+00  1.35081738e-01
   8.48232284e-02]
 [ 1.36139860e+01  4.19211721e+00 -3.20945501e+00  2.18733716e+00
  -4.01715696e-01  5.97185135e-01 -1.99873590e+00 -2.67680198e-01
   1.32281050e-01  8.51957723e-02 -1.09730947e+00 -2.98939615e-01
  -1.25483662e-01]
 [ 1.17383862e+01  4.50710011e+00 -3.49937201e+00  2.10765624e+00
  -2.73330271e-01  1.88296229e-01 -1.65850389e+00 -3.14269364e-01
   1.58833176e-01  6.95471466e-02 -1.12044358e+00 -1.86688647e-01
  -3.22262436e-01]
 [ 1.00383234e+01  4.85653400e+00 -3.15614939e+00  2.04429173e+00
   1.21236322e-02  2.43012577e-01 -1.30773032e+00 -1.37