In [None]:
import os
import numpy as np
import librosa
from tensorflow.keras import layers, models, initializers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, UpSampling1D, BatchNormalization, Activation, Concatenate
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import librosa.display
from io import BytesIO, StringIO
import base64
from flask import Flask, render_template, request, redirect, url_for

# Initializing "Flask app"
app = Flask(__name__)

# Function to load and preprocess  audio --- "VCKT dataset"
def load_and_preprocess_vckt_audio(file_path, target_sr=16000, window_size=2048, hop_size=None, upsampling_factor=4):
    # Assuming the VCKT dataset format
    audio, _ = load_vckt_audio(file_path)  # implementing the load_vckt_audio function
    audio = audio / np.max(np.abs(audio))

    if hop_size is None:
        hop_size = window_size // 2   #To determine how much the window moves forward at each iteration during audio processing...

    X = []
    y = []

    for i in range(0, len(audio) - window_size, hop_size):
        X.append(audio[i:i+window_size])
        y.append(audio[i:i+window_size*upsampling_factor:upsampling_factor])

    X = np.array(X)[:, :, np.newaxis]
    y = np.array(y)[:, :, np.newaxis]

    return X, y

# Building with the advanced model using --- "U-Net architecture with spectral normalization"
def build_advanced_model(input_shape):
    initializer = initializers.RandomNormal(stddev=0.02)

    model = Sequential()

    # Encoder
    model.add(Conv1D(64, kernel_size=9, padding='same', kernel_initializer=initializer, use_bias=False, input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    # Skip connection
    skip_connection = Conv1D(64, kernel_size=1, padding='same', kernel_initializer=initializer, use_bias=False)(model.layers[-1].output)
    model.add(MaxPooling1D(pool_size=2))

    # Decoder
    model.add(Conv1D(128, kernel_size=9, padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(UpSampling1D(size=2))

    # Skip connection
    model.add(Concatenate())
    model.add(Conv1D(64, kernel_size=1, padding='same', kernel_initializer=initializer, use_bias=False))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(Conv1D(1, kernel_size=9, padding='same', activation='tanh'))

    return model

# Function to generate --- "super-resolved audio"
def generate_super_res_audio(model, input_audio):
    input_audio = input_audio[np.newaxis, :, np.newaxis]
    output_audio = model.predict(input_audio)
    return np.squeeze(output_audio)

# Function to calculating --- "spectrogram difference frames"
def calculate_spectrogram_difference_frames(original_spec, processed_spec):
    return original_spec - processed_spec

# Function to create and saving the --- "frequency-domain representations as images"
def save_frequency_representation_images(audio, file_prefix, target_sr=16000):
    plt.figure(figsize=(10, 4))
    spec = librosa.amplitude_to_db(librosa.stft(audio), ref=np.max)
    librosa.display.specshow(spec, y_axis='log', x_axis='time', sr=target_sr)
    plt.title(f'{file_prefix} Frequency Representation')
    plt.colorbar(format='%+2.0f dB')
    plt.tight_layout()
    img_buffer = BytesIO()
    plt.savefig(img_buffer, format='png')
    plt.clf()
    return img_buffer

# Functioning to create HTML content      "html content"
def create_html_content(file_prefix, audio_files, low_res_audios, super_res_audios, cubic_baseline_audios, high_res_audios, low_res_imgs, super_res_imgs, cubic_baseline_imgs, high_res_imgs):
    html_content = f"""
    <html>
    <head>
      <title>{file_prefix} Audio and Spectrogram Comparison</title>
    </head>
    <body>
      <h2>{file_prefix} Audio and Spectrogram Comparison</h2>
    """
    for i, audio_file in enumerate(audio_files):
        html_content += f"""
          <h3>{file_prefix} - Audio File {i + 1}</h3>
          <audio controls>
            <source src="{low_res_audios[i]}" type="audio/wav">
            Your browser does not support the audio element.
          </audio>
          <audio controls>
            <source src="{super_res_audios[i]}" type="audio/wav">
            Your browser does not support the audio element.
          </audio>
          <audio controls>
            <source src="{cubic_baseline_audios[i]}" type="audio/wav">
            Your browser does not support the audio element.
          </audio>
          <audio controls>
            <source src="{high_res_audios[i]}" type="audio/wav">
            Your browser does not support the audio element.
          </audio>
          <br>
          <img src="data:image/png;base64,{base64.b64encode(low_res_imgs[i].getvalue()).decode()}" alt="Low Resolution">
          <img src="data:image/png;base64,{base64.b64encode(super_res_imgs[i].getvalue()).decode()}" alt="Super Resolution">
          <img src="data:image/png;base64,{base64.b64encode(cubic_baseline_imgs[i].getvalue()).decode()}" alt="Cubic Baseline">
          <img src="data:image/png;base64,{base64.b64encode(high_res_imgs[i].getvalue()).decode()}" alt="High Resolution">
        """
    html_content += """
    </body>
    </html>
    """
    return html_content

# Route for --- home page
@app.route('/')
def home():
    return render_template('index.html')

# Route to handleing file upload from VCKT dataset
@app.route('/upload_vckt', methods=['POST'])
def upload_vckt_file():
    if 'file' not in request.files:
        return redirect(request.url)
    file = request.files['file']
    if file.filename == '':
        return redirect(request.url)

    # Save the uploaded file to vckt_uploads folder
    file_path = os.path.join('vckt_uploads', file.filename)
    file.save(file_path)

    # Load and preprocess data from VCKT dataset
    X_low_res, _ = load_and_preprocess_vckt_audio(file_path, upsampling_factor=4)

    # Build and compile the model
    model = build_advanced_model(X_low_res.shape[1:])
    model.compile(optimizer=Adam(learning_rate=0.0002, beta_1=0.5), loss='mae')

    # Load pre-trained model
    model.load_weights('advanced_audio_super_resolution_model.h5')

    # Generating the --- "super-resolved audio"
    super_res_output = generate_super_res_audio(model, X_low_res[0])

    # Cubic Baseline (simple upsampling)
    cubic_baseline = librosa.resample(X_low_res[0, :, 0], orig_sr=16000, target_sr=64000, res_type='kaiser_best')

    # High Resolution
    high_res_output = X_low_res[0, :, 0]

    # Save audio signals as audio files
    librosa.output.write_wav(f'vckt_uploads/low_resolution_{file.filename}', X_low_res[0, :, 0], sr=16000)
    librosa.output.write_wav(f'vckt_uploads/super_resolution_{file.filename}', super_res_output, sr=16000)
    librosa.output.write_wav(f'vckt_uploads/cubic_baseline_{file.filename}', cubic_baseline, sr=64000)
    librosa.output.write_wav(f'vckt_uploads/high_resolution_{file.filename}', high_res_output, sr=16000)

    # Save frequency-domain representations as images
    low_res_img = save_frequency_representation_images(X_low_res[0, :, 0], f'vckt_uploads/low_resolution_{file.filename}')
    super_res_img = save_frequency_representation_images(super_res_output, f'vckt_uploads/super_resolution_{file.filename}')
    cubic_baseline_img = save_frequency_representation_images(cubic_baseline, f'vckt_uploads/cubic_baseline_{file.filename}')
    high_res_img = save_frequency_representation_images(high_res_output, f'vckt_uploads/high_resolution_{file.filename}')

    # Create HTML --- "output"
    low_res_audio_file = f'vckt_uploads/low_resolution_{file.filename}'
    super_res_audio_file = f'vckt_uploads/super_resolution_{file.filename}'
    cubic_baseline_audio_file = f'vckt_uploads/cubic_baseline_{file.filename}'
    high_res_audio_file = f'vckt_uploads/high_resolution_{file.filename}'

    html_content = create_html_content(file.filename, [file.filename], [low_res_audio_file], [super_res_audio_file], [cubic_baseline_audio_file], [high_res_audio_file],
                                       [low_res_img], [super_res_img], [cubic_baseline_img], [high_res_img])

    # Save the HTML content to a file in vckt_uploads folder
    html_filename = f'vckt_uploads/audio_and_spectrogram_comparison_{file.filename}.html'
    with open(html_filename, 'w') as html_file:
        html_file.write(html_content)

    return redirect(url_for('uploaded_file', filename=file.filename))

# Route to display uploaded file
@app.route('/vckt_uploads/<filename>')
def uploaded_file(filename):
    html_filename = f'audio_and_spectrogram_comparison_{filename}.html'
    return render_template(html_filename)

if __name__ == '__main__':
    os.makedirs('vckt_uploads', exist_ok=True)
    app.run(debug=True)