In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
import IPython.display as ipd
import matplotlib.pyplot as plt

In [None]:
from sins.database.database import SINS, AudioReader
from sins.database.utils import prepare_sessions
from collections import defaultdict
from pprint import pprint

## Get database instance

In [None]:
db = SINS()

## Annotations

In [None]:
db.sessions

In [None]:
sessions = prepare_sessions(
    db.sessions, room='living', include_absence=True, discard_other_rooms=False, discard_ambiguities=False
)
sessions

In [None]:
presence = prepare_sessions(
    db.sessions, room='living', include_absence=True,
    discard_other_rooms=True, discard_ambiguities=False,
    label_map_fn=lambda label: (False if label == "absence" else True)
)
presence

#### Proposed train-/validate-/evaluate-sections

In [None]:
db.train_ranges, db.validate_ranges, db.eval_ranges

## Get lazy_dataset providing fix length (except for the last) segments

In [None]:
segment_dataset = db.get_segments(
    "Node6", max_segment_length=60., time_ranges=db.train_ranges, annotations={"scene": db.sessions}
)

In [None]:
segment_dataset[110], segment_dataset[111]

In [None]:
segment_dataset[991], segment_dataset[992], segment_dataset[993]

## Get lazy_dataset providing single session segments

In [None]:
session_dataset = db.get_segments(
    "Node1", max_segment_length=60., time_ranges=db.train_ranges, sessions=sessions
)

In [None]:
session_dataset[110], session_dataset[111]

In [None]:
session_dataset[1018], session_dataset[1019], session_dataset[1020]

## Get parallel data
If you call the ```get_segments``` the same segments will be returned for all datasets.

In [None]:
nodes = db.room_to_nodes['living']  # list(db.node_to_room.keys())
parallel_datasets = db.get_segments(
    nodes, max_segment_length=60., time_ranges=db.train_ranges, sessions=sessions
)
audio_datasets = [ds.map(AudioReader()) for ds in parallel_datasets]

In [None]:
example_idx = 1249
for ds in audio_datasets:
    ipd.display(ipd.Audio(ds[example_idx]['audio_data'][0], rate=16000))

In [None]:
plt.plot(audio_datasets[0][example_idx]['audio_data'][0,:])

## Feature Extraction

In [None]:
from sins.features.stft import STFT
from sins.features.mel_transform import MelTransform
from sins.features.normalize import Normalizer
import numpy as np

In [None]:
def plot_spec(x):
    plt.imshow(x.T, interpolation='nearest', aspect='auto', origin="lower")

In [None]:
mbe_datasets = [ds.map(STFT(320, 1024)).map(MelTransform(16000, 1024, 80, 200)) for ds in audio_datasets]

In [None]:
idx = 2500
for ds in mbe_datasets:
    example = ds[idx]
    mbe = example["mel_transform"][0, -100:]
    plot_spec(mbe)
    plt.show()

In [None]:
normalizers = [
    Normalizer("mel_transform", (0, 1), (0, 1, 2), name=f"Node{i+1}") for i in range(len(mbe_datasets))
]
for i in range(len(mbe_datasets)):
    normalizers[i].initialize_moments(mbe_datasets[i].shuffle()[:300].prefetch(8, 100), verbose=True)
    mbe_datasets[i] = mbe_datasets[i].map(normalizers[i])