<a href="https://colab.research.google.com/github/jkranyak/background_removed_image_tones_to_music_tone_UI/blob/main/Untitled8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio numpy pandas Pillow scikit-learn rembg

Collecting gradio
  Downloading gradio-4.37.1-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
Collecting rembg
  Downloading rembg-2.0.57-py3-none-any.whl (33 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.

In [None]:
import gradio as gr
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.cluster import MiniBatchKMeans
import io
import logging
import traceback
import time
import random
from rembg import remove
from scipy import signal

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def get_dominant_colors(image, n_colors=5):
    image = image.resize((100, 100))  # Resize for faster processing
    image_array = np.array(image)

    # If the image has an alpha channel, remove it for color clustering
    if image_array.shape[2] == 4:
        image_array = image_array[:,:,:3]

    image_array = image_array.reshape(-1, 3)

    kmeans = MiniBatchKMeans(n_clusters=n_colors, random_state=42, batch_size=100)
    kmeans.fit(image_array)

    colors = kmeans.cluster_centers_
    return pd.DataFrame(colors.astype(int), columns=['R', 'G', 'B'])

def generate_note(frequency, duration, sample_rate=44100, amplitude=0.5):
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    note = amplitude * np.sin(2 * np.pi * frequency * t)
    # Apply a simple envelope
    envelope = np.linspace(0, 1, int(sample_rate * 0.01))
    envelope = np.concatenate([envelope, np.ones(len(note) - 2*len(envelope)), envelope[::-1]])
    return note * envelope

def create_audio_from_colors(color_df, duration=10, bpm=120, is_background=False):
    try:
        start_time = time.time()
        sample_rate = 44100
        colors = color_df.values.tolist()

        # Ensure we have at least 8 colors by repeating if necessary
        while len(colors) < 8:
            colors += colors
        colors = colors[:8]  # Take top 8 colors

        # Map colors to frequencies (now using a pentatonic scale for better harmony)
        base_freq = 220 if is_background else 440  # A3 for background, A4 for foreground
        scale = [0, 2, 4, 7, 9]  # Pentatonic scale intervals
        freqs = [base_freq * (2 ** (scale[i % 5] / 12)) * (2 ** (i // 5)) for i in range(8)]
        color_to_freq = dict(zip(map(tuple, colors), freqs))

        beat_duration = 60 / bpm
        sequence_length = int(duration / beat_duration)
        full_audio = np.zeros(int(sample_rate * duration))

        for i in range(sequence_length):
            # Choose 2-3 colors for each beat
            beat_colors = random.sample(colors, random.randint(2, 3))
            for color in beat_colors:
                freq = color_to_freq[tuple(color)]
                note = generate_note(freq, beat_duration, sample_rate, 0.3 if is_background else 0.5)
                start_index = int(i * beat_duration * sample_rate)
                end_index = start_index + len(note)
                full_audio[start_index:end_index] += note

        # Apply low-pass filter for background
        if is_background:
            b, a = signal.butter(10, 0.5)
            full_audio = signal.filtfilt(b, a, full_audio)

        # Normalize audio
        full_audio = full_audio / np.max(np.abs(full_audio))

        # Convert to int16
        full_audio = (full_audio * 32767).astype(np.int16)

        color_info = [
            {'rgb': f'rgb({r},{g},{b})', 'frequency': f'{color_to_freq[tuple([r,g,b])]:.2f} Hz'}
            for r, g, b in colors
        ]

        logging.info(f"{'Background' if is_background else 'Foreground'} audio generation completed in {time.time() - start_time:.2f} seconds")
        return color_info, full_audio, sample_rate
    except Exception as e:
        logging.error(f"Error in create_audio_from_colors: {str(e)}")
        logging.error(traceback.format_exc())
        raise

def separate_foreground_background(image):
    # Convert image to RGBA if it's not already
    if image.mode != 'RGBA':
        image = image.convert('RGBA')

    # Remove background
    foreground = remove(np.array(image))

    # Create a mask of the foreground
    mask = foreground[:,:,3] > 0

    # Create background by masking out the foreground
    background = np.array(image)
    if background.shape[2] == 3:  # RGB image
        background[mask] = [0, 0, 0]
    else:  # RGBA image
        background[mask] = [0, 0, 0, 0]

    return Image.fromarray(foreground), Image.fromarray(background)

def process_image(image, duration=10, bpm=120):
    try:
        start_time = time.time()
        logging.info("Starting image processing")

        # Separate foreground and background
        foreground, background = separate_foreground_background(image)

        # Process the foreground colors
        fg_color_df = get_dominant_colors(foreground, n_colors=8)
        fg_color_info, fg_audio_data, sample_rate = create_audio_from_colors(fg_color_df, duration=duration, bpm=bpm)

        # Process the background colors
        bg_color_df = get_dominant_colors(background, n_colors=8)
        bg_color_info, bg_audio_data, _ = create_audio_from_colors(bg_color_df, duration=duration, bpm=bpm, is_background=True)

        # Mix foreground and background audio
        mixed_audio = fg_audio_data * 0.7 + bg_audio_data * 0.3
        mixed_audio = np.int16(mixed_audio / np.max(np.abs(mixed_audio)) * 32767)

        # Create color swatches
        fg_swatches = Image.new('RGB', (400, 50))
        bg_swatches = Image.new('RGB', (400, 50))
        for i, (fg_color, bg_color) in enumerate(zip(fg_color_info, bg_color_info)):
            fg_r, fg_g, fg_b = map(int, fg_color['rgb'][4:-1].split(','))
            bg_r, bg_g, bg_b = map(int, bg_color['rgb'][4:-1].split(','))
            fg_swatches.paste(Image.new('RGB', (50, 50), (fg_r, fg_g, fg_b)), (i*50, 0))
            bg_swatches.paste(Image.new('RGB', (50, 50), (bg_r, bg_g, bg_b)), (i*50, 0))

        # Combine swatches
        combined_swatches = Image.new('RGB', (400, 100))
        combined_swatches.paste(fg_swatches, (0, 0))
        combined_swatches.paste(bg_swatches, (0, 50))

        # Create color information text
        color_text = "Foreground Colors:\n" + "\n".join([f"{color['rgb']} - {color['frequency']}" for color in fg_color_info])
        color_text += "\n\nBackground Colors:\n" + "\n".join([f"{color['rgb']} - {color['frequency']}" for color in bg_color_info])

        logging.info(f"Image processing completed in {time.time() - start_time:.2f} seconds")
        return combined_swatches, color_text, (sample_rate, mixed_audio)

    except Exception as e:
        logging.error(f"Error in process_image: {str(e)}")
        logging.error(traceback.format_exc())
        return None, f"An error occurred: {str(e)}", None

# Define Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Slider(minimum=5, maximum=30, value=10, step=1, label="Duration (seconds)"),
        gr.Slider(minimum=60, maximum=180, value=120, step=1, label="Tempo (BPM)")
    ],
    outputs=[
        gr.Image(type="pil", label="Dominant Colors (Foreground top, Background bottom)"),
        gr.Textbox(label="Color Information"),
        gr.Audio(type="numpy", label="Palette Music")
    ],
    title="Advanced Image Color Palette to Music Converter with Foreground/Background Separation",
    description="Upload an image to extract its dominant colors from foreground and background, and hear them as a musical composition!"
)

# Launch the interface
iface.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://ce6414756a88cf7c95.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


