<a href="https://colab.research.google.com/github/bnsreenu/python_for_microscopists/blob/master/sonification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

!apt-get install -y fluidsynth
!pip install pretty_midi pydub moviepy opencv-python-headless

In [None]:

import cv2
import numpy as np
import pretty_midi
from pydub import AudioSegment
from moviepy.editor import ImageSequenceClip, AudioFileClip
import subprocess, os

# Image path
image_path = "/content/drive/MyDrive/ColabNotebooks/data/img_to_music/nuclei_128x128.jpg"  #tissue.jpg or nuclei.jpg

# SoundFonts - canbe downloaded for free... just do a Google search
sitar_sf2 = "/content/drive/MyDrive/ColabNotebooks/data/img_to_music/Realistic_Sitar_GM.sf2"
piano_sf2 = "/content/drive/MyDrive/ColabNotebooks/data/img_to_music/Piano2.sf2"
percussion_sf2 = "/content/drive/MyDrive/ColabNotebooks/data/img_to_music/African Percussion.sf2"
tanpura_path = "/content/drive/MyDrive/ColabNotebooks/data/img_to_music/tanpura_C.mp3"

# Options
grayscale_mode = False        # True: grayscale (sitar only), False: RGB (3 instruments)
note_duration = 0.5           # seconds per note
add_drone = True              # add tanpura drone (background music)
generate_video = True        # set True to generate video
fps_video = 15                # FPS for video
drone_volume_db = -20         # drone volume adjustment (in my case, the Tanpura was too loud compared to others)

# Output filenames
midi_file = "output.mid"
wav_file = "output.wav"
mp3_file = "sitar_with_drone.mp3"
video_file = "sonification_video.mp4"

# --- Load image ---
img = cv2.imread(image_path)  # Note that this loads image as BGR and not RGB
if grayscale_mode:
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

#
if grayscale_mode:
    channels = {'Sitar': img}
    soundfonts = {'Sitar': sitar_sf2}
    midi_programs = {'Sitar': 104}  # GM sitar
else:
    channels = {'Sitar': img[:,:,1], 'Piano': img[:,:,2], 'Percussion': img[:,:,0]}   # R, G, B channels - change based on the image
    soundfonts = {'Sitar': sitar_sf2, 'Piano': piano_sf2, 'Percussion': percussion_sf2}
    midi_programs = {'Sitar': 104, 'Piano': 0, 'Percussion': 10}  # GM numbers (116=Tabla, 106=Harmonium) - originally tried different instruments. Change accordingly

# Define a simple scale (C major pentatonic)
scale = [60, 62, 64, 67, 69, 72, 74, 76]

# --- Create PrettyMIDI object and add notes for each channel ---
midi = pretty_midi.PrettyMIDI()

for instr_name, channel_data in channels.items():
    instrument = pretty_midi.Instrument(program=midi_programs[instr_name])

    # Downsample: average along rows to get 1D line
    if len(channel_data.shape) == 2:
        line = np.mean(channel_data, axis=0)
    else:
        line = channel_data.flatten()

    line = (line - line.min()) / (line.max() - line.min())

    time = 0
    for val in line:
        pitch = scale[int(val * (len(scale)-1))]
        note = pretty_midi.Note(velocity=80, pitch=pitch, start=time, end=time+note_duration)
        instrument.notes.append(note)
        time += note_duration

    midi.instruments.append(instrument)

midi.write(midi_file)

# --- Render each channel separately with its own SoundFont ---
from pydub import AudioSegment

final_mix = AudioSegment.silent(duration=0)  # start empty

for instr_index, (instr_name, sf2_path) in enumerate(soundfonts.items()):
    # Create a temporary MIDI with only this instrument
    temp_midi = pretty_midi.PrettyMIDI()
    temp_midi.instruments.append(midi.instruments[instr_index])
    temp_midi_file = f"temp_{instr_name}.mid"
    temp_midi.write(temp_midi_file)

    # Render with the corresponding SoundFont
    temp_wav = f"temp_{instr_name}.wav"
    subprocess.run(["fluidsynth", "-ni", sf2_path, temp_midi_file, "-F", temp_wav, "-r", "44100"])

    # Load audio and overlay
    track = AudioSegment.from_wav(temp_wav)

    # Extend final_mix if needed
    if len(track) > len(final_mix):
        final_mix = final_mix + AudioSegment.silent(duration=(len(track)-len(final_mix)))

    final_mix = final_mix.overlay(track)

# --- Add drone if requested ---
if add_drone:
    drone = AudioSegment.from_file(tanpura_path)
    drone = drone + drone_volume_db  # lower volume
    # Loop and trim to match final_mix length
    drone_looped = drone * (len(final_mix) // len(drone) + 1)
    drone_looped = drone_looped[:len(final_mix)]
    mix = drone_looped.overlay(final_mix)
else:
    mix = final_mix

mix.export(mp3_file, format="mp3")


# --- Generate video with moving cursor (optional) --- can take a long time for large images
if generate_video:
    os.makedirs("frames", exist_ok=True)
    if grayscale_mode:
        h, w = img.shape
    else:
        h, w, _ = img.shape
    total_time = len(line) * note_duration
    total_frames = int(total_time * fps_video)

    for i in range(total_frames):
        if grayscale_mode:
            frame = cv2.cvtColor(img.copy(), cv2.COLOR_GRAY2BGR)
        else:
            frame = img.copy()
        idx = int(i / total_frames * w)
        cv2.line(frame, (idx,0), (idx,h), (0,0,255), 2)  # red vertical line
        cv2.imwrite(f"frames/frame_{i:04d}.png", frame)

    frame_files = [f"frames/frame_{i:04d}.png" for i in range(total_frames)]
    clip = ImageSequenceClip(frame_files, fps=fps_video)
    audio = AudioFileClip(mp3_file)
    clip = clip.set_audio(audio)
    clip.write_videofile(video_file, codec="libx264", audio_codec="aac")
