In [4]:
import audiofile
import numpy as np 
import pandas as pd 
from pathlib import Path 
import librosa.feature as LF 

datadir = Path.cwd() / 'data'

In [7]:
from VoiceClassifier.common import AudioClip

prefix = r"ichinose_tamaki_taidan"
clip = AudioClip(datadir / f"{prefix}.m4a")

In [44]:
# difference between channels
dChannel = (clip.data[0,:] - clip.data[1,:])
np.sum(dChannel), np.mean(dChannel)

(19.769806, 1.135981e-07)

In [32]:
def load_data(prefix: str) -> tuple[pd.DataFrame, dict[str, int]]:
    df_ts = pd.read_csv(
        datadir / f"{prefix}_merge-subs.csv", 
        index_col=[0,1]
    ).\
        rename_axis(index=["Speaker", "ASS_index"]).\
        reset_index().\
        loc[:, ['Speaker', 'ASS_index', 'Start_samples', 'End_samples']]

    encoding: dict[str, int] = {name : i for i, name in enumerate(df_ts['Speaker'].unique())}
    df_ts['Speaker'] = df_ts['Speaker'].replace(encoding)
    return df_ts, encoding 

df_ts, encoding = load_data(prefix)

print(encoding)
df_ts.head()

{'Ichinose': 0, 'Tamaki': 1}


Unnamed: 0,Speaker,ASS_index,Start_samples,End_samples
0,0,6,12522195,12572910
1,0,12,13121955,13847841
2,0,17,13948830,14142870
3,0,23,14737338,14926527
4,0,28,15316812,15636978


In [62]:
def get_ts(ind: int, df=df_ts) -> list[int]:
    a, b = df.loc[df.ASS_index == ind, ['Start_samples', 'End_samples']].values[0]
    return a, b

def double_mean(X: np.ndarray) -> np.ndarray:
    return np.mean(np.mean(X, axis=0), axis=1)

In [43]:
test = clip.clip(*get_ts(12))
test.shape

(2, 725886)

In [65]:
double_mean(LF.mfcc(y=test, sr=clip.rate, n_mfcc=40).T).shape

(40,)

In [56]:
stft = np.abs(librosa.stft(test))
stft.shape

(2, 1025, 1418)

In [70]:
double_mean(LF.chroma_stft(S=stft, sr=clip.rate).T).shape

(12,)

In [67]:
double_mean(LF.melspectrogram(y=test, sr=clip.rate).T).shape

(128,)

In [69]:
double_mean(LF.spectral_contrast(y=test, sr=clip.rate).T).shape

(7,)

In [72]:
from librosa.effects import harmonic 
double_mean(LF.tonnetz(y=harmonic(test), sr=clip.rate).T).shape

(6,)