In [9]:
import os
import torch
import json
import numpy as np
import librosa
from types import SimpleNamespace
from models import Generator
from scipy.io.wavfile import write

# -----------------------------
# 1️⃣ Set working directory
# -----------------------------
os.chdir("/home/hemant/Desktop/my_project/hifi-gan")  # project folder jahan generator_v1.pt aur config_v1.json hai

# -----------------------------
# 2️⃣ Load HiFi-GAN config
# -----------------------------
with open("config_v1.json") as f:
    config_dict = json.load(f)
config = SimpleNamespace(**config_dict)

# -----------------------------
# 3️⃣ Load Generator model
# -----------------------------
model = Generator(config)
checkpoint = torch.load("generator_v1.pt", map_location="cpu")
model.load_state_dict(checkpoint['generator'])  # check key matches checkpoint
model.eval()

# -----------------------------
# 4️⃣ Function to convert audio to MEL spectrogram
# -----------------------------
def wav_to_mel(file_path, config):
    # Load audio
    wav, sr = librosa.load(file_path, sr=config.sampling_rate)
    
    # Normalize
    wav = wav / np.abs(wav).max() * 0.99
    
    # Convert to MEL spectrogram
    mel = librosa.feature.melspectrogram(
        y=wav,
        sr=sr,
        n_fft=config.n_fft,
        hop_length=config.hop_size,
        win_length=config.win_size,
        n_mels=config.num_mels,
        fmin=config.fmin,
        fmax=config.fmax
    )
    mel = torch.from_numpy(mel).unsqueeze(0).float()  # [1, n_mels, time]
    return mel

# -----------------------------
# 5️⃣ Pass your audio file here
# -----------------------------
input_wav = "scale.wav"
mel = wav_to_mel(input_wav, config)

# -----------------------------
# 6️⃣ Generate audio from MEL
# -----------------------------
with torch.no_grad():
    audio_out = model(mel).squeeze().cpu().numpy()

# -----------------------------
# 7️⃣ Save generated audio
# -----------------------------
output_file = "output_from_scale.wav"
write(output_file, config.sampling_rate, (audio_out * 32767).astype(np.int16))

print(f"✅ Audio Generated Successfully: {output_file}")


✅ Audio Generated Successfully: output_from_scale.wav
