# Inference

In [None]:
from pyannote.database import get_protocol, FileFinder
protocol = get_protocol('Debug.SpeakerDiarization.Debug', 
                        preprocessors={"audio": FileFinder()})

## Train and apply a voice activity detection model

In [None]:
from pyannote.audio.tasks import VoiceActivityDetection
from pyannote.audio.models.segmentation.debug import SimpleSegmentationModel
import pytorch_lightning as pl

In [None]:
vad = VoiceActivityDetection(protocol, duration=2., batch_size=16, num_workers=4)
model = SimpleSegmentationModel(task=vad)
trainer = pl.Trainer(max_epochs=1, default_root_dir='inference/vad')
_ = trainer.fit(model)

In [None]:
from pyannote.audio import Inference
inference = Inference(model, step=0.1, batch_size=128)

In [None]:
# inference
dev_file = next(protocol.development())
scores = inference(dev_file)
scores

In [None]:
# inference on an excerpt 
from pyannote.core import Segment
scores = inference.crop(dev_file, Segment(10, 15))
scores

In [None]:
# inference on an excerpt shorter than sliding window duration
scores = inference.crop(dev_file, Segment(10, 11.5))
scores

In [None]:
# inference on a whole chunk
inference = Inference(model, window="whole")
scores = inference.crop(dev_file, Segment(10, 15))

## Load and apply a pretrained VAD model

In [None]:
from pyannote.audio import Model
model = Model.from_pretrained('inference/vad/lightning_logs/version_0/checkpoints/epoch=0-step=8.ckpt')
inference = Inference(model, step=0.1, batch_size=128)
scores = inference(dev_file)
scores

## Train and apply a speaker embedding model

In [None]:
from pyannote.audio.tasks import SpeakerEmbedding
emb = SpeakerEmbedding(protocol, duration=2., num_workers=4)
from pyannote.audio.models.embedding.debug import SimpleEmbeddingModel
model = SimpleEmbeddingModel(task=emb)
trainer = pl.Trainer(max_epochs=1, default_root_dir='inference/emb')
_ = trainer.fit(model)

In [None]:
# inference using a sliding window
inference = Inference(model, duration=1., step=0.5)
embeddings = inference(dev_file)

data, window = embeddings.data, embeddings.sliding_window
data.shape, window.start, window.duration, window.step

In [None]:
# inference using a sliding window on an excerpt
embeddings = inference.crop(dev_file, Segment(5, 12))

data, window = embeddings.data, embeddings.sliding_window
data.shape, window.start, window.duration, window.step

In [None]:
# inference using a sliding window on an excerpt shorter than sliding window
embeddings = inference.crop(dev_file, Segment(11.1, 12))

data, window = embeddings.data, embeddings.sliding_window
data.shape, window.start, window.duration, window.step

In [None]:
# inference on a whole chunk
inference = Inference(model, window="whole")
embeddings = inference.crop(dev_file, Segment(5, 12))

embeddings.shape

In [None]:
# inference on a whole chunk shorter than training duration
embeddings = inference.crop(dev_file, Segment(5, 5.2))

embeddings.shape

In [None]:
# inference on a collection of chunks
embeddings = inference.crop(dev_file, [Segment(0, 4), Segment(5, 12)])

embeddings.shape