In [14]:
import numpy as np
import pandas as pd
import librosa as lr
import matplotlib.pyplot as plt
from types import SimpleNamespace
import math

### **Audio Configuration**

In [5]:
audio_config = SimpleNamespace(
    base_path = "/workspaces/system_project/audio_processing/assets/audio" + "/",
    sr = 44100,
    mono = True,
)

In [20]:
audio, sr = lr.load("assets/audio/file_1.mp3", sr=44100)

1295

### **1. Read Audio.**

In [272]:
def get_audio_contents(path):
    audio_content, _ = lr.load(audio_config.base_path + path, sr=audio_config.sr)
    _250ms_block_count = len(audio_content) // audio_config.sr * 4 # as, 1 sec = 4 * 250ms
    _250ms_block_data = []

    for ms_250_count in range(_250ms_block_count):
        segment_start_idx = int((audio_config.sr * 250 / 1000) * ms_250_count)
        segment_end_idx = int((audio_config.sr * 250 / 1000) * (ms_250_count + 1))

        _250ms_block_data.append(audio_content[segment_start_idx: segment_end_idx])
    
    return _250ms_block_data

### **2. Make Spectrogram**

In [284]:
def make_spectrogram(audio_contents):
    sec_by_sec_spectrogram = []
    for aud_cnt in audio_contents:
        spectrogram_of_ith_sec = lr.stft(aud_cnt, n_fft=1024)
        sec_by_sec_spectrogram.append(spectrogram_of_ith_sec)

    return sec_by_sec_spectrogram

## **Pipeline**

In [285]:
class FingerprintPipeline:
    def __init__(self, reader_function, spectrogram_genrator_function):
        self.audio_configs = audio_config
        self.audio_content_reader = reader_function
        self.spectrogram_generator = spectrogram_genrator_function
        self.__audio_fingerprint = None

    def __fingerprint__(self, audio_path):
        audio_contents_sec_by_sec = self.audio_content_reader(audio_path)
        spectrogram_matrix = np.array(self.spectrogram_generator(audio_contents_sec_by_sec))
        
        return np.abs(spectrogram_matrix)
    
    def get_fingerprint(self):
        return self.__audio_fingerprint

In [286]:
fingerprint_pipeline = FingerprintPipeline(get_audio_contents, make_spectrogram)

In [287]:
file_1_fingerprint = fingerprint_pipeline.__fingerprint__("file_2.mp3")

file_1_fingerprint.shape

(500, 513, 44)

In [289]:
for block_idx, block_content in enumerate(file_1_fingerprint):
    _sum = 0
    for idx, cnt_block in enumerate(block_content):
        # _sum += (np.average(cnt_block) * idx + 1)
        print(max(cnt_block))
    # print(block_idx, "\t", _sum)

11.105773
17.307392
26.897654
18.28024
26.948854
31.163267
25.982079
19.1279
9.767877
3.7961485
2.1296453
1.5665464
1.5814154
1.3173325
1.4121505
1.0336256
0.8603796
0.24869697
0.39273384
0.91201526
0.58079153
0.25946164
0.1863413
0.16598922
0.13174324
0.12870602
0.13142496
0.13832118
0.13152993
0.11977854
0.104876
0.10349921
0.102772355
0.08858152
0.06948297
0.09121819
0.10478735
0.090490595
0.0738115
0.07857035
0.086950615
0.082891025
0.07575065
0.074019775
0.07338061
0.07184019
0.069174156
0.066493206
0.06541687
0.06379233
0.06194224
0.060157057
0.058418717
0.05737059
0.05637135
0.054262772
0.053037792
0.05131249
0.05160794
0.052301846
0.051110387
0.05082216
0.051785506
0.050261993
0.04855389
0.04744276
0.046651185
0.04593304
0.045172352
0.044479202
0.044105183
0.04334998
0.042480968
0.041914582
0.041418504
0.04053566
0.04008889
0.03961969
0.038769297
0.038232785
0.03784267
0.03754362
0.037045497
0.03651545
0.03622491
0.035864487
0.035262696
0.03494377
0.03448954
0.033973955
0.03358

In [279]:
_sum

264.6240964629369

In [10]:
audio, sr = lr.load("assets/audio/file_1.mp3", sr=44100)

audio

array([ 0.00000000e+00,  1.45246927e-17, -1.01258024e-17, ...,
       -1.70824188e-11, -1.80069848e-11,  1.27721479e-11], dtype=float32)

In [13]:
np.abs(np.fft.fft(audio))

array([1138.46899578,  121.94817418,  301.82018428, ...,   43.17280438,
        301.82018428,  121.94817418])