<a href="https://colab.research.google.com/github/Vishal8500/Text-to-Speech-DL-Project/blob/main/Text_to_Speech.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install librosa numpy pandas matplotlib tqdm



In [None]:
DATASET_PATH = "/content/drive/MyDrive/LJSpeech-1.1"

In [None]:
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import json

# Set dataset paths
DATASET_PATH = "/content/drive/MyDrive/LJSpeech-1.1"  # Update this if needed
AUDIO_PATH = os.path.join(DATASET_PATH, "wavs")
METADATA_FILE = os.path.join(DATASET_PATH, "metadata.csv")
OUTPUT_PATH = "/content/processed_data"

# Create output directory
os.makedirs(OUTPUT_PATH, exist_ok=True)

# Load metadata
df = pd.read_csv(METADATA_FILE, sep="|", header=None, names=["file_id", "transcript", "normalized_text"])

# Handle missing or non-string values
df["normalized_text"] = df["normalized_text"].fillna("").astype(str)

# Function to clean text
def clean_text(text):
    text = text.lower().strip()
    text = text.replace(",", "").replace(".", "").replace(";", "").replace("?", "")
    return text

# Apply text cleaning
df["clean_text"] = df["normalized_text"].apply(clean_text)

# Function to convert audio to Mel-spectrogram
def process_audio(file_id, sr=22050, n_mels=80):
    file_path = os.path.join(AUDIO_PATH, f"{file_id}.wav")
    y, _ = librosa.load(file_path, sr=sr)

    # Convert to Mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    return mel_spec_db

# Process all audio files and save
mel_data = {}

for idx, row in tqdm(df.iterrows(), total=len(df)):
    file_id = row["file_id"]
    mel_spec_db = process_audio(file_id)

    # Save spectrogram as numpy file
    np.save(os.path.join(OUTPUT_PATH, f"{file_id}.npy"), mel_spec_db)

    # Store metadata (text and spectrogram shape)
    mel_data[file_id] = {"text": row["clean_text"], "mel_shape": mel_spec_db.shape}

# Save metadata to JSON
with open(os.path.join(OUTPUT_PATH, "metadata.json"), "w") as f:
    json.dump(mel_data, f, indent=4)

print("✅ Preprocessing complete! Mel-spectrograms and metadata saved.")


100%|██████████| 13100/13100 [2:53:43<00:00,  1.26it/s]

✅ Preprocessing complete! Mel-spectrograms and metadata saved.





In [None]:
import shutil

# Zip the processed_data folder
shutil.make_archive('/content/processed_data', 'zip', '/content/processed_data')

# Download the zip file
from google.colab import files
files.download('/content/processed_data.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required libraries
!pip install tensorflow librosa matplotlib




In [None]:
# Example: TensorFlow Dataset loading (you can use PyTorch similarly)
import tensorflow as tf

def load_data(metadata_path, audio_path, batch_size=32):
    with open(metadata_path, "r") as f:
        metadata = json.load(f)

    text_data = [entry["text"] for entry in metadata.values()]
    mel_data = [np.load(os.path.join(audio_path, f"{file_id}.npy")) for file_id in metadata.keys()]

    # Convert to TensorFlow Dataset (you can also use PyTorch Dataset)
    dataset = tf.data.Dataset.from_tensor_slices((text_data, mel_data))
    dataset = dataset.batch(batch_size)
    return dataset


In [1]:
pip install torch torchvision numpy tensorboard scipy matplotlib librosa

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [8]:
!python --version


Python 3.11.11


In [1]:
!sudo apt-get update -y
!sudo apt-get install python3.7 python3.7-distutils -y


0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Connecting to cloud.r-project.org] [Connecting to r2u                                                                               Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
                                                                               Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
0% [Waiting for headers] [Connected to cloud.r-project.org (65.9.86.12)] [Conne                                                                               Get:4 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
0% [4 InRelease 114 kB/127 kB 90%] [Connected to cloud.r-project.org (65.9.86.10% [Connected to cloud.r-project.org (65.9.86.12)] [Connecting to r2u.stat.illi                                                                               Get:5 https://developer.download.nvidia.com/compute/cu

In [2]:
!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1
!sudo update-alternatives --config python3


There are 3 choices for the alternative python3 (providing /usr/bin/python3).

  Selection    Path                 Priority   Status
------------------------------------------------------------
* 0            /usr/bin/python3.11   2         auto mode
  1            /usr/bin/python3.10   1         manual mode
  2            /usr/bin/python3.11   2         manual mode
  3            /usr/bin/python3.7    1         manual mode

Press <enter> to keep the current choice[*], or type selection number: 3
update-alternatives: using /usr/bin/python3.7 to provide /usr/bin/python3 (python3) in manual mode


In [3]:
!python --version


Python 3.7.17


In [4]:
!git clone https://github.com/NVIDIA/tacotron2.git

Cloning into 'tacotron2'...
remote: Enumerating objects: 412, done.[K
remote: Total 412 (delta 0), reused 0 (delta 0), pack-reused 412 (from 1)[K
Receiving objects: 100% (412/412), 2.70 MiB | 10.76 MiB/s, done.
Resolving deltas: 100% (203/203), done.


In [5]:
%cd tacotron2

/content/tacotron2


In [7]:
!wget https://bootstrap.pypa.io/pip/3.7/get-pip.py
!python3.7 get-pip.py


--2025-03-25 08:54:00--  https://bootstrap.pypa.io/pip/3.7/get-pip.py
Resolving bootstrap.pypa.io (bootstrap.pypa.io)... 151.101.0.175, 151.101.64.175, 151.101.128.175, ...
Connecting to bootstrap.pypa.io (bootstrap.pypa.io)|151.101.0.175|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2636033 (2.5M) [text/x-python]
Saving to: ‘get-pip.py’


2025-03-25 08:54:00 (91.8 MB/s) - ‘get-pip.py’ saved [2636033/2636033]

Collecting pip<24.1
  Downloading pip-24.0-py3-none-any.whl.metadata (3.6 kB)
Collecting setuptools
  Downloading setuptools-68.0.0-py3-none-any.whl.metadata (6.4 kB)
Collecting wheel
  Downloading wheel-0.42.0-py3-none-any.whl.metadata (2.2 kB)
Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading setuptools-68.0.0-py3-none-any.whl (804 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m804.0/804.0 kB[0m [31m

In [8]:
!pip install -r requirements.txt

Collecting matplotlib==2.1.0 (from -r requirements.txt (line 1))
  Downloading matplotlib-2.1.0.tar.gz (35.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.7/35.7 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorflow==1.15.2 (from -r requirements.txt (line 2))
  Downloading tensorflow-1.15.2-cp37-cp37m-manylinux2010_x86_64.whl.metadata (2.7 kB)
Collecting numpy==1.13.3 (from -r requirements.txt (line 3))
  Downloading numpy-1.13.3.zip (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m64.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting inflect==0.2.5 (from -r requirements.txt (line 4))
  Downloading inflect-0.2.5-py2.py3-none-any.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.3/50.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting librosa==0.

In [10]:
!pip install numpy==1.18.5
!pip install matplotlib==3.2.2
!pip install tensorflow==1.15.2
!pip install inflect==0.2.5
!pip install librosa==0.6.0
!pip install scipy==1.0.0
!pip install Unidecode==1.0.22
!pip install pillow


Collecting matplotlib==3.2.2
  Downloading matplotlib-3.2.2-cp37-cp37m-manylinux1_x86_64.whl.metadata (5.2 kB)
Collecting cycler>=0.10 (from matplotlib==3.2.2)
  Using cached cycler-0.11.0-py3-none-any.whl.metadata (785 bytes)
Collecting kiwisolver>=1.0.1 (from matplotlib==3.2.2)
  Downloading kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting python-dateutil>=2.1 (from matplotlib==3.2.2)
  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting typing-extensions (from kiwisolver>=1.0.1->matplotlib==3.2.2)
  Downloading typing_extensions-4.7.1-py3-none-any.whl.metadata (3.1 kB)
Downloading matplotlib-3.2.2-cp37-cp37m-manylinux1_x86_64.whl (12.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cycler-0.11.0-py3-none-any.whl (6.4 kB)
Downloading kiwisolver-1.4.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl

In [11]:
!apt-get install -y libsndfile1 ffmpeg
!pip install matplotlib numpy scipy librosa unidecode inflect
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libsndfile1 is already the newest version (1.0.31-2ubuntu0.2).
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 32 not upgraded.
Looking in indexes: https://download.pytorch.org/whl/cu118
[31mERROR: Could not find a version that satisfies the requirement torch (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for torch[0m[31m
[0m

In [12]:
pip install torch==1.9.0  # Replace with a compatible version


Collecting torch==1.9.0
  Downloading torch-1.9.0-cp37-cp37m-manylinux1_x86_64.whl.metadata (25 kB)
Downloading torch-1.9.0-cp37-cp37m-manylinux1_x86_64.whl (831.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m831.4/831.4 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch
Successfully installed torch-1.9.0


In [13]:
import zipfile
import os

# Path to the zip file
zip_file_path = '/content/drive/MyDrive/processed_data.zip'

# Directory where you want to unzip the file
unzip_dir = '/content/processed_data'

# Ensure the directory exists
os.makedirs(unzip_dir, exist_ok=True)

# Unzipping the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

print(f"Files extracted to {unzip_dir}")


Files extracted to /content/processed_data


In [14]:
import os
import json
import random
import shutil

# Path to your JSON file
json_file = '/content/processed_data/metadata'
output_dir = '/content/metadata'  # Replace with your desired output directory

# Paths for train, valid, and transcripts
train_dir = os.path.join(output_dir, 'train')
valid_dir = os.path.join(output_dir, 'valid')
transcripts_dir = os.path.join(output_dir, 'transcripts')

# Create directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)
os.makedirs(transcripts_dir, exist_ok=True)

# Load JSON data
with open(json_file, 'r') as f:
    data = json.load(f)

# List to hold all filenames and transcriptions
all_data = []

# Prepare transcripts and create file list
for audio_id, details in data.items():
    mel_filename = f'{audio_id}.npy'
    transcription = details['text']

    # Save transcript to a file
    with open(os.path.join(transcripts_dir, f'{audio_id}.txt'), 'w') as f:
        f.write(transcription)

    # Add to the data list
    all_data.append(f'{mel_filename}|{transcription}')

# Shuffle the data
random.shuffle(all_data)

# Split data into 90% train and 10% valid
train_size = int(0.9 * len(all_data))
train_data = all_data[:train_size]
valid_data = all_data[train_size:]

# Save train and valid files
with open(os.path.join(train_dir, 'train.txt'), 'w') as f:
    f.write('\n'.join(train_data))

with open(os.path.join(valid_dir, 'valid.txt'), 'w') as f:
    f.write('\n'.join(valid_data))

print("✅ Train and validation files created successfully.")


FileNotFoundError: [Errno 2] No such file or directory: '/content/processed_data/metadata'