In [10]:
import numpy as np
import csv
from pydub import AudioSegment
from pydub.generators import Sine
import os
from PIL import Image
from tqdm import tqdm


def load_image(file_path, max_dimensions=(1920, 1080), max_file_size=500 * 1024 * 1024):
    """Load an image file and return its resized RGB pixel data."""
    file_size = os.path.getsize(file_path)
    if file_size > max_file_size:
        print(f"The file size is {file_size / (1024 ** 2):.2f} MB, which exceeds the maximum allowed size of {max_file_size / (1024 ** 2):.2f} MB.")
        resize = input("Do you want to resize or select a smaller portion of the file to reduce memory usage? (y/n): ").strip().lower()
        if resize != 'y':
            raise ValueError("File is too large to process. Exiting as per user choice.")

    if file_path.endswith('.npy'):
        try:
            image_data = np.load(file_path)
            print(f"Loaded .npy file with shape: {image_data.shape}")
        except MemoryError:
            raise ValueError("Unable to load the .npy file due to memory constraints.")

        if image_data.ndim == 4:  # Batch of images
            print(f"Detected batch data with {image_data.shape[0]} images of shape {image_data.shape[1:]}.")
            subset_size = min(100, image_data.shape[0])
            print(f"Processing the first {subset_size} images in the batch.")
            image_data = image_data[:subset_size]
            image_data = np.vstack(image_data)  # Combine into one image

        if image_data.ndim == 3 and image_data.shape[2] != 3:
            raise ValueError(f"Unsupported number of channels in image data: {image_data.shape[2]}")
    else:
        image = Image.open(file_path)
        if image.mode != 'RGB':
            image = image.convert('RGB')
        image_data = np.array(image)

    if len(image_data.shape) != 3 or image_data.shape[2] != 3:
        raise ValueError(f"Invalid image shape: {image_data.shape}. Expected (height, width, 3).")

    return image_data


def pixel_to_sound_brightness(red, green, blue):
    """Map brightness to pitch modulation."""
    brightness = (int(red) + int(green) + int(blue)) / 3
    frequency = 200 + (brightness / 255) * (2000 - 200)
    sound = Sine(frequency).to_audio_segment(duration=0.1)  # Set to 0.01 ms
    return sound


def pixel_to_sound_color(red, green, blue):
    """Map RGB to frequency, volume, and duration."""
    frequency = 200 + (red / 255) * (2000 - 200)
    volume = -20 + (green / 255) * 20
    duration = 0.1  # Set to 0.01 ms
    sound = Sine(frequency).to_audio_segment(duration=duration).apply_gain(volume)
    return sound


def pixel_to_sound_spatial(red, green, blue, x, width):
    """Map pixel position to stereo sound."""
    frequency = 200 + (red / 255) * (2000 - 200)
    duration = 0.1  # Set to 0.01 ms
    sound = Sine(frequency).to_audio_segment(duration=duration)
    pan = (x / width) * 2 - 1  # Map x to range [-1, 1]
    return sound.pan(pan)


def generate_sound(image_data, mode):
    """Generate sound from image data based on the selected mode."""
    print(f"Generating sound with image shape: {image_data.shape}")
    height, width, _ = image_data.shape
    combined_sound = AudioSegment.silent(duration=0)

    for row in tqdm(range(height), desc="Processing rows"):
        row_sound = AudioSegment.silent(duration=0)  # Combine row-level sounds
        for col in range(width):
            r, g, b = image_data[row, col]
            if mode == 1:
                sound = pixel_to_sound_brightness(r, g, b)
            elif mode == 2:
                sound = pixel_to_sound_color(r, g, b)
            elif mode == 3:
                sound = pixel_to_sound_spatial(r, g, b, col, width)
            row_sound += sound
        combined_sound += row_sound
    return combined_sound


def main():
    print("Welcome to the Image Sonification Tool!")
    image_path = input("Enter the path to your image file (.npy, .png, .jpg): ").strip()
    if not os.path.exists(image_path):
        print("Error: File not found.")
        return

    output_path = input("Enter the path to save the output sound file (e.g., output.wav): ").strip()

    try:
        image_data = load_image(image_path)
        print(f"Loaded image with shape: {image_data.shape}")
    except ValueError as e:
        print(f"Error loading image: {e}")
        return

    print("Select a sonification mode:")
    print("1. Brightness-based pitch modulation")
    print("2. Color-based sound effects")
    print("3. Spatial stereo mapping")
    mode = int(input("Your choice: "))
    if mode not in [1, 2, 3]:
        print("Invalid choice. Exiting.")
        return

    print("Processing the image and generating the audio...")
    sound = generate_sound(image_data, mode)
    print("Audio generation complete!")

    print(f"Saving the sound file to {output_path}...")
    sound.export(output_path, format="wav")
    print(f"Sound file saved successfully to {output_path}")


if __name__ == "__main__":
    main()


Welcome to the Image Sonification Tool!


Enter the path to your image file (.npy, .png, .jpg):  D:\PaAC\Astro Sonification\Assignment 5\high_res_galaxy_image.npy
Enter the path to save the output sound file (e.g., output.wav):  D:\PaAC\Astro Sonification\Assignment 5\0output.wav


Loaded .npy file with shape: (256, 256, 3)
Loaded image with shape: (256, 256, 3)
Select a sonification mode:
1. Brightness-based pitch modulation
2. Color-based sound effects
3. Spatial stereo mapping


Your choice:  1


Processing the image and generating the audio...
Generating sound with image shape: (256, 256, 3)


Processing rows: 100%|███████████████████████████████████████████████████████████████| 256/256 [00:02<00:00, 98.42it/s]

Audio generation complete!
Saving the sound file to D:\PaAC\Astro Sonification\Assignment 5\0output.wav...
Sound file saved successfully to D:\PaAC\Astro Sonification\Assignment 5\0output.wav



