In [1]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import librosa as lb

In [2]:
path = "/home/rajesh/Desktop/Datasets/Saraga3stem/train/Aaniraimekkani/"
files = os.listdir(path)
files

['vocals.wav',
 'violin.wav',
 'mixture.wav',
 'mridangam.wav',
 'accompaniment.wav']

In [3]:
audio, fs = lb.load(path+'vocals.wav')
audio, fs

(array([-0.00065906, -0.00089022, -0.00050186, ..., -0.00017095,
        -0.00015366,  0.        ], dtype=float32),
 22050)

In [4]:
stft = lb.stft(audio)
stft.shape

(1025, 12365)

In [5]:
magnitude = np.abs(stft)

In [149]:
magnitude.shape[1]

12365

In [246]:
'''
FUNCTION: ENCODES (SPLITS & MERGE) THE INPUT SPECTOGRAM INTO VARIOUS BINS (See Ref. Figure)
USE: To handle dynamic inputs to neural netwok.
Parameters:
(1) array: Input spectogram of size m*n
(2) frame: No of bins to be grouped together. Default is 3, If frame = no of columns in input spect/3, 
    and skip=3, then output is same as input.
(3) skip: Overlap within the bins (1<=skip<=3), default is 1 to maintain loss of info.
    either use skip =1 or frame = 1 for no loss of info.

Note: Batches/Grouping is always 3.
'''

def encode(array, frame=3, skip=1):
    
    data = []
    for i in range(0, array.shape[1]-(3*frame), skip):
        y_ = array[:, i:i+frame]
        y = array[:, i+frame:i+(2*frame)]
        y__ = array[:, i+(2*frame):i+(3*frame)]
        concat = np.concatenate((y_, y, y__), axis=0, casting="same_kind")
        data.append(concat)
        
    return np.array(data)

In [247]:
inp = encode(magnitude, 3, 1)
inp.shape

(12356, 3075, 3)

In [268]:
def decode(array, frame=3, skip=1):
    reconst = []
    l, m, n = array.shape
    flag = False
    #print(l, m, n)
    for i in array:
        
        y_ = i[:int(m/3)]
        y = i[int(m/3):int(2*(m/3))]
        y__ = i[int(2*(m/3)):m]

        if skip == 1:
            if flag:
                #print(y_.shape)
                t = np.array([y_[:, 2]]).T
                #print(t.shape)
                reconst = np.concatenate((reconst, t), axis=1, casting="same_kind")
            else:
                reconst = y_
                
        #print(reconst.shape)
        flag = True
        #print(c)
        
    if skip == 1:
        reconst = np.concatenate((reconst, y, y__), axis=1, casting="same_kind")

    return np.array(reconst)



reconst = decode(inp)

In [269]:
reconst.shape, magnitude.shape

((1025, 12364), (1025, 12365))

In [270]:
magnitude

array([[1.21750738e-02, 3.63594014e-03, 5.33618825e-03, ...,
        1.94633771e-02, 8.41832720e-03, 4.48640343e-03],
       [1.47315767e-02, 1.05061112e-02, 2.40587187e-03, ...,
        1.31324315e-02, 1.45874638e-02, 7.93578476e-03],
       [1.76880620e-02, 1.41496817e-02, 1.12638343e-02, ...,
        8.40157829e-03, 8.84553231e-03, 1.28661422e-02],
       ...,
       [3.02451172e-05, 1.57026971e-05, 6.27625019e-09, ...,
        4.37095782e-09, 1.67638882e-05, 6.62301973e-05],
       [3.01385462e-05, 1.56496753e-05, 1.06499725e-08, ...,
        3.48854279e-09, 1.67573526e-05, 6.62284219e-05],
       [3.01076107e-05, 1.56377064e-05, 1.70433356e-09, ...,
        5.39927481e-09, 1.67656544e-05, 6.62283564e-05]], dtype=float32)

In [271]:
reconst

array([[1.21750738e-02, 3.63594014e-03, 5.33618825e-03, ...,
        1.25449663e-02, 1.94633771e-02, 8.41832720e-03],
       [1.47315767e-02, 1.05061112e-02, 2.40587187e-03, ...,
        8.45798571e-03, 1.31324315e-02, 1.45874638e-02],
       [1.76880620e-02, 1.41496817e-02, 1.12638343e-02, ...,
        7.71837588e-03, 8.40157829e-03, 8.84553231e-03],
       ...,
       [3.02451172e-05, 1.57026971e-05, 6.27625019e-09, ...,
        4.62968597e-09, 4.37095782e-09, 1.67638882e-05],
       [3.01385462e-05, 1.56496753e-05, 1.06499725e-08, ...,
        1.77742276e-09, 3.48854279e-09, 1.67573526e-05],
       [3.01076107e-05, 1.56377064e-05, 1.70433356e-09, ...,
        3.96657196e-09, 5.39927481e-09, 1.67656544e-05]], dtype=float32)

In [278]:
inp[12355][3074]

array([3.9665720e-09, 5.3992748e-09, 1.6765654e-05], dtype=float32)

In [204]:
a = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]])
b = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]])
c = np.array([[3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3]])
concat = np.concatenate((a, b, c), axis=1, casting="same_kind")
concat.shape

(4, 9)

In [205]:
concat

array([[1, 1, 1, 2, 2, 2, 3, 3, 3],
       [1, 1, 1, 2, 2, 2, 3, 3, 3],
       [1, 1, 1, 2, 2, 2, 3, 3, 3],
       [1, 1, 1, 2, 2, 2, 3, 3, 3]])