In [2]:
pip install tacotron2

^C
Note: you may need to restart the kernel to use updated packages.


In [None]:
from tkinter import Tk, Text, Button, Label, Radiobutton, IntVar, Toplevel
from gtts import gTTS
import os
import pyttsx3

class TTSApp:
    def __init__(self, root):
        self.root = root
        root.title("Sinhala TTS App")

        self.translation_choice = IntVar()
        self.translation_choice.set(1)  # Default choice: Online Translation (gTTS)

        self.online_radio = Radiobutton(root, text="Online Translation (gTTS)", variable=self.translation_choice, value=1)
        self.online_radio.pack()

        self.offline_radio = Radiobutton(root, text="Offline Translation (pyttsx3)", variable=self.translation_choice, value=2)
        self.offline_radio.pack()

        self.next_button = Button(root, text="Next", command=self.show_next_stage)
        self.next_button.pack()

        self.dataset_index = 0
        self.load_dataset()

    def load_dataset(self):
        # Load the dataset from the metadata.txt file
        with open("metadata.txt", encoding="utf-8") as file:
            self.dataset = [line.strip() for line in file]

    def show_next_stage(self):
        translation_choice = self.translation_choice.get()

        if translation_choice == 1:
            self.show_online_translation_stage()
        elif translation_choice == 2:
            self.show_offline_translation_stage()

    def show_online_translation_stage(self):
        online_stage = Toplevel(self.root)
        online_stage.title("Online Translation Stage")

        text_label = Label(online_stage, text="Sinhala Text:")
        text_label.pack()

        text_entry = Text(online_stage, height=5, width=40)
        text_entry.pack()

        # Load the next entry from the dataset
        #if self.dataset_index < len(self.dataset):
            #text_entry.insert("1.0", self.dataset[self.dataset_index])
            #self.dataset_index += 1

        play_button = Button(online_stage, text="Play", command=lambda: self.play_audio(text_entry.get("1.0", "end-1c")))
        play_button.pack()

    def show_offline_translation_stage(self):
        offline_stage = Toplevel(self.root)
        offline_stage.title("Offline Translation Stage")

        text_label = Label(offline_stage, text="Sinhala Text:")
        text_label.pack()

        text_entry = Text(offline_stage, height=5, width=40)
        text_entry.pack()

        # Load the next entry from the dataset
       # if self.dataset_index < len(self.dataset):
           # text_entry.insert("1.0", self.dataset[self.dataset_index])
           # self.dataset_index += 1

        play_button = Button(offline_stage, text="Play", command=lambda: self.play_audio(text_entry.get("1.0", "end-1c")))
        play_button.pack()

    def play_audio(self, input_text):
        translation_choice = self.translation_choice.get()

        if translation_choice == 1:
            self.neural_network_synthesis_gtts(input_text)
            os.system("start neural_network_output_gtts.mp3")
        elif translation_choice == 2:
            self.neural_network_synthesis_pyttsx(input_text)
            os.system("start neural_network_output_pyttsx.mp3")

    def rule_based_synthesis(self, text):
        return text.upper()

    def concatenate_synthesis(self, text):
        return text.replace(" ", "")

    def neural_network_synthesis_gtts(self, text):
        tts = gTTS(text=text, lang='si')
        tts.save("neural_network_output_gtts.mp3")

    def load_tacotron_model(self):
        # Load Tacotron-2 model state dictionary
        tacotron_model = Tacotron2()  # Initialize Tacotron-2 model
        tacotron_model.load_state_dict(torch.load("tacotron2_statedict.pt", map_location=torch.device('cpu')))
        tacotron_model.eval()
        return tacotron_model

    def tacotron_synthesis(self, text):
        # Load Tacotron-2 model
        tacotron_model = self.load_tacotron_model()

        # Perform Tacotron-2 synthesis
        mel_outputs, mel_outputs_postnet, _, alignments = tacotron_model(text)

        print("Tacotron synthesis complete")
    def neural_network_synthesis_pyttsx(self, text):
        # Initialize the pyttsx3 engine
        engine = pyttsx3.init()

        # Set properties for customization
        engine.setProperty('rate', 150)  # Adjust the speech rate (words per minute)
        engine.setProperty('volume', 1.0)  # Set the volume (0.0 to 1.0)

        # Find the index of the Sinhala voice
        sinhala_voice_index = None
        voices = engine.getProperty('voices')
        for idx, voice in enumerate(voices):
            if 'si' in voice.languages:
                sinhala_voice_index = idx
                break

        if sinhala_voice_index is not None:
            # Use the Sinhala voice for synthesis
            engine.setProperty('voice', voices[sinhala_voice_index].id)

            # Save the synthesized speech to a file
            engine.save_to_file(text, "neural_network_output_pyttsx.mp3")

            # Run the pyttsx3 engine to perform synthesis
            engine.runAndWait()
        else:
            print("Sinhala voice not found.")



if __name__ == "__main__":
    root = Tk()
    app = TTSApp(root)
    root.mainloop()


Sinhala voice not found.
Sinhala voice not found.
