In [None]:
import numpy as np
import wave
from matplotlib import pyplot as plt
import os
from scipy import signal

def read_audio_from_wav(file_path):
    with wave.open(file_path, 'rb') as wav_file:
        audio_data = wav_file.readframes(-1)
        audio = np.frombuffer(audio_data, dtype=np.int16)
        audio = audio.astype(np.float32) / np.iinfo(np.int16).max  # Convert to floating-point
        sample_rate = wav_file.getframerate()
    
    return audio, sample_rate


def custom_spectrogram(audio, sample_rate, n_fft, hop_length, window, win_length):
    # Calculate the number of frames
    num_frames = 1 + int((len(audio) - n_fft) / hop_length)
    
    # Create an empty array to store the spectrogram
    spectrogram = np.zeros((n_fft // 2 + 1, num_frames), dtype=np.complex64)
    
    # Apply the window function
    window_func = getattr(np.fft, window)
    window = window_func(win_length)
    
    # Iterate over the frames
    for i in range(num_frames):
        # Extract the current frame
        frame = audio[i * hop_length : i * hop_length + n_fft] * window
        
        # Compute the Fourier transform of the frame
        spectrum = np.fft.rfft(frame, n=n_fft)
        
        # Store the spectrum in the spectrogram
        spectrogram[:, i] = spectrum
    
    return spectrogram


# Usage change the file path to the file you want to use
file_path = 'audios/minor_1.wav'
audio, sample_rate = read_audio_from_wav(file_path)

n_fft_values = [512, 1024, 2048, 4096]
hop_length_values = [256, 512, 1024, 2048]
window_values = ['hann', 'hamming', 'blackman', 'bartlett']
win_length_values = [256, 512, 1024, 2048]


# Create the directory if it doesn't exist
save_dir = 'plots/original'
os.makedirs(save_dir, exist_ok=True)

for n_fft in n_fft_values:
    for hop_length in hop_length_values:
        for window in window_values:
            for win_length in win_length_values:
                if n_fft < win_length:
                    continue
                
                spectrogram = custom_spectrogram(audio, sample_rate, n_fft, hop_length, window, win_length)
                spectrogram_db = 10 * np.log10(np.abs(spectrogram) ** 2)
                
                # Plot the spectrogram
                plt.figure()
                plt.imshow(spectrogram_db, aspect='auto', origin='lower')
                plt.colorbar(format='%+2.0f dB')
                
                # Save the plot with parameter values in the filename
                filename = f'n_fft_{n_fft}_hop_{hop_length}_window_{window}_win_{win_length}.png'
                save_path = os.path.join(save_dir, filename)
                plt.savefig(save_path)
                
                plt.show()

In [12]:
%pip install gtts

Collecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Collecting click<8.2,>=7.1 (from gtts)
  Using cached click-8.1.7-py3-none-any.whl (97 kB)
Installing collected packages: click, gtts
Successfully installed click-8.1.7 gtts-2.5.1
Note: you may need to restart the kernel to use updated packages.


In [16]:
from gtts import gTTS

# Define the text to convert to speech
text = "Hello, everyone, my name is Bikash, and here is the spectrogram forthe minor 1 exam."

# Create the gTTS object and specify the language
tts = gTTS(text=text, lang='en', tld='com.au')

# Save the speech as a wave file
output_file = 'audios/tts_minor_1.mp3'
tts.save(output_file)


In [17]:
import subprocess

# Specify the input and output file paths
input_file = 'audios/tts_minor_1.mp3'
output_file = 'audios/tts_minor_1.wav'

# Run the ffmpeg command to convert the file
subprocess.run(['ffmpeg', '-i', input_file, output_file])


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

CompletedProcess(args=['ffmpeg', '-i', 'audios/tts_minor_1.mp3', 'audios/tts_minor_1.wav'], returncode=0)

In [None]:
# Usage chnage the file path to the file you want to use
file_path = 'audios/tts_minor_1.wav'
audio, sample_rate = read_audio_from_wav(file_path)

n_fft_values = [512, 1024, 2048, 4096]
hop_length_values = [256, 512, 1024, 2048]
window_values = ['hann', 'hamming', 'blackman', 'bartlett']
win_length_values = [256, 512, 1024, 2048]


# Create the directory if it doesn't exist
save_dir = 'plots/tts'
os.makedirs(save_dir, exist_ok=True)

for n_fft in n_fft_values:
    for hop_length in hop_length_values:
        for window in window_values:
            for win_length in win_length_values:
                if n_fft < win_length:
                    continue
                
                spectrogram = custom_spectrogram(audio, sample_rate, n_fft, hop_length, window, win_length)
                spectrogram_db = 10 * np.log10(np.abs(spectrogram) ** 2)
                
                # Plot the spectrogram
                plt.figure()
                plt.imshow(spectrogram_db, aspect='auto', origin='lower')
                plt.colorbar(format='%+2.0f dB')
                
                # Save the plot with parameter values in the filename
                filename = f'n_fft_{n_fft}_hop_{hop_length}_window_{window}_win_{win_length}.png'
                save_path = os.path.join(save_dir, filename)
                plt.savefig(save_path)
                
                plt.show()
