<a href="https://colab.research.google.com/github/dixy52-beep/COLAB_Custom_Translator/blob/main/Audio-Reconstruction-SuperResolution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Installare le librerie necessarie
!pip install librosa
!pip install soundfile
!pip install tensorflow

# Importare le librerie
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
import tensorflow as tf
from tensorflow.keras import layers
import os

# Funzione per caricare e preprocessare l'audio
def load_and_preprocess_audio(audio_path, sr=16000):
    # Carica l'audio
    original_audio, sr = librosa.load(audio_path, sr=sr)

    # Downsampling (bassa qualità)
    low_quality_audio = librosa.resample(original_audio, orig_sr=sr, target_sr=sr//4)  # Riduci il sample rate a 1/4

    # Resample per riportarlo alla dimensione originale
    low_quality_audio_upscaled = librosa.resample(low_quality_audio, orig_sr=sr//4, target_sr=sr)

    return original_audio, low_quality_audio_upscaled, sr

# Funzione per visualizzare i segnali audio
def plot_audio_signals(original_audio, low_quality_audio_upscaled, reconstructed_audio, sr):
    plt.figure(figsize=(12, 8))

    plt.subplot(3, 1, 1)
    librosa.display.waveshow(original_audio, sr=sr)
    plt.title("Original Audio")

    plt.subplot(3, 1, 2)
    librosa.display.waveshow(low_quality_audio_upscaled, sr=sr)
    plt.title("Low Quality Audio (Upscaled)")

    plt.subplot(3, 1, 3)
    librosa.display.waveshow(reconstructed_audio, sr=sr)
    plt.title("Reconstructed Audio (Super Resolution)")

    plt.tight_layout()
    plt.show()

# Funzione per costruire il modello di super-resolution audio
def build_audio_super_resolution_model(input_shape):
    model = tf.keras.Sequential()

    # Primo livello convoluzionale
    model.add(layers.Conv1D(64, kernel_size=9, padding='same', input_shape=input_shape))
    model.add(layers.Activation('relu'))

    # Livelli intermedi convoluzionali
    model.add(layers.Conv1D(64, kernel_size=9, padding='same'))
    model.add(layers.Activation('relu'))

    # Ultimo livello convoluzionale
    model.add(layers.Conv1D(1, kernel_size=9, padding='same'))

    return model

# Funzione per dividere l'audio in finestre (segmenti)
def prepare_data(audio, window_size=8192):
    windows = []
    for i in range(0, len(audio) - window_size, window_size):
        windows.append(audio[i:i+window_size])
    return np.array(windows)

# Funzione per ricostruire l'audio
def reconstruct_audio(model, low_quality_audio_upscaled, window_size=8192):
    reconstructed_audio = []

    # Processa ogni finestra e ricostruisci
    for i in range(0, len(low_quality_audio_upscaled) - window_size, window_size):
        window = low_quality_audio_upscaled[i:i+window_size]
        window = np.expand_dims(window, axis=0)  # Aggiungi batch dimension
        window = np.expand_dims(window, axis=2)  # Aggiungi dimensione canale
        reconstructed_window = model.predict(window)
        reconstructed_audio.extend(reconstructed_window.flatten())  # Aggiungi alla ricostruzione totale

    # Concatenazione delle finestre
    return np.array(reconstructed_audio)

# Carica e preprocessa l'audio
audio_path = 'Audio.wav'  # Sostituisci con il percorso del tuo file audio
original_audio, low_quality_audio_upscaled, sr = load_and_preprocess_audio(audio_path)

# Prepara i dati per l'addestramento
x_train = prepare_data(low_quality_audio_upscaled)
y_train = prepare_data(original_audio)

# Reshape per la rete convoluzionale
x_train = np.expand_dims(x_train, axis=2)
y_train = np.expand_dims(y_train, axis=2)

# Costruire il modello
input_shape = (x_train.shape[1], 1)
model = build_audio_super_resolution_model(input_shape)

# Compilare il modello
model.compile(optimizer='adam', loss='mse')

# Addestrare il modello
history = model.fit(x_train, y_train, epochs=250, batch_size=16)

# Ricostruire tutto l'audio
reconstructed_audio = reconstruct_audio(model, low_quality_audio_upscaled)

# Confrontare i segnali audio
plot_audio_signals(original_audio[:len(reconstructed_audio)], low_quality_audio_upscaled[:len(reconstructed_audio)], reconstructed_audio, sr)

# Salvare i risultati audio
sf.write('original_audio.wav', original_audio, sr)
sf.write('low_quality_audio_upscaled.wav', low_quality_audio_upscaled, sr)
sf.write('reconstructed_audio_full.wav', reconstructed_audio, sr)

print("Audio salvato: 'original_audio.wav', 'low_quality_audio_upscaled.wav', 'reconstructed_audio_full.wav'")