
```bibtex
@inproceedings{yang24c_interspeech,
  title     = {Frame-Wise Breath Detection with Self-Training: An Exploration of Enhancing Breath Naturalness in Text-to-Speech},
  author    = {Dong Yang and Tomoki Koriyama and Yuki Saito},
  year      = {2024},
  booktitle = {Interspeech 2024},
  pages     = {4928--4932},
  doi       = {10.21437/Interspeech.2024-168},
}
```

[code](https://github.com/ydqmkkx/Respiro-en)

In [None]:
import torch
from modules import DetectionNet, BreathDetector

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DetectionNet().to(device)
checkpoint = torch.load("respiro-en.pt")
model.load_state_dict(checkpoint["model"])
model.eval()

detector = BreathDetector(model)

In [None]:
sample = "/home/joregan/hsi/audio/hsi_1_0515_209_001_inter.wav"

In [None]:
tree = detector(sample)

In [None]:
from intervaltree import Interval, IntervalTree
from praatio import textgrid
from praatio.utilities.constants import Interval

last = 0.0
tier = []
for interval in sorted(tree):
    if interval.begin > last:
        tier.append((last, interval.begin, "sil"))
    tier.append((interval.begin, interval.end, "b"))
    last = interval.end

tg = textgrid.Textgrid()
a = [Interval(x[0], x[1], x[2]) for x in tier]
breath_tier = textgrid.IntervalTier('respiro', a, tier[0][0], tier[-1][1])
tg.addTier(breath_tier)
tg.save("/tmp/hsi_1_0515_209_001_inter.TextGrid", format="long_textgrid", includeBlankSpaces=False)