Skip to content

Commit

Permalink
Rewrite text2wave in order to improve its performance
Browse files Browse the repository at this point in the history
- Removes the square order combine_waves function
  using a linear replacement.

- text2wave is improved so as utterances are written directly
  to the final file, reducing both disk i/o and memory usage
  • Loading branch information
zeehio committed Aug 15, 2020
1 parent 898f971 commit b61902a
Showing 1 changed file with 30 additions and 22 deletions.
52 changes: 30 additions & 22 deletions examples/text2wave.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,16 @@
(gc-status nil)

;;; Default argument values
(defvar fp nil)
(defvar totalnumsamples 0)
(defvar outfile "-")
(defvar output_type 'riff)
(defvar frequency nil) ;; default is no frequency modification
(defvar text_files '("-"))
(defvar mode nil)
(defvar volume "1.0")
(defvar wavefiles nil)
(defvar an_utt nil)

;;; Get options
(define (get_options)
Expand Down Expand Up @@ -127,39 +130,40 @@
(format stderr "%s: %s\n" "text2wave" message)
(text2wave_help))

(define (save_record_wave utt)

(define (save_record_wave_fp utt)
"Saves the waveform and records its so it can be joined into a
a single waveform at the end."
(let ((fn (make_tmp_filename)))
(utt.save.wave utt fn)
(set! wavefiles (cons fn wavefiles))
utt))

(define (combine_waves)
"Join all the waves together into the desired output file
and delete the intermediate ones."
(let ((wholeutt (utt.synth (Utterance Text ""))))
(mapcar
(lambda (d)
(utt.import.wave wholeutt d t)
(delete-file d))
(reverse wavefiles))
(if frequency
(utt.wave.resample wholeutt (parse-number frequency)))
(utt.wave.resample utt (parse-number frequency))
)
(if (eq? totalnumsamples 0)
(wave.save.header fp (utt.wave utt) output_type nil
(list (list "numsamples" 0)))
)
(set! totalnumsamples (+ totalnumsamples
(get_param 'num_samples (wave.info (utt.wave utt)) 0)
)
)
(if (not (equal? volume "1.0"))
(begin
(utt.wave.rescale wholeutt (parse-number volume))))
(utt.save.wave wholeutt outfile output_type)
))
(utt.wave.rescale utt (parse-number volume))
)
)
(wave.save.data.fp (utt.wave utt) fp output_type nil)
(set! an_utt utt)
)

;;;
;;; Redefine what happens to utterances during text to speech
;;;
(set! tts_hooks (list utt.synth save_record_wave))
(set! tts_hooks (list utt.synth save_record_wave_fp))

(define (main)
(get_options)

(set! fp (fopen outfile "wb"))

;; do the synthesis
(mapcar
(lambda (f)
Expand All @@ -168,8 +172,12 @@ and delete the intermediate ones."
(tts_file f (tts_find_text_mode f auto-text-mode-alist))))
text_files)

;; Now put the waveforms together at again
(combine_waves)
;; Now update the header
(fseek fp 0 0)
(wave.save.header fp (utt.wave an_utt) output_type nil
(list (list "numsamples" totalnumsamples))
)
(fclose fp)
)

;;; Do the work
Expand Down

0 comments on commit b61902a

Please sign in to comment.