In [None]:
from IPython import get_ipython
from IPython.display import display
# %%
!pip install librosa scikit-learn matplotlib numpy soundfile moviepy pillow



In [None]:
import numpy as np
import librosa
import soundfile as sf
from sklearn.ensemble import RandomForestClassifier
from moviepy.editor import VideoClip, AudioFileClip
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
import tempfile
import random

In [None]:
SAMPLE_RATE = 22050

CHORDS = {
    # Major Triads
    "C Major":   ['C', 'E', 'G'],
    "C# Major":  ['C#', 'F', 'G#'],
    "D Major":   ['D', 'F#', 'A'],
    "D# Major":  ['D#', 'G', 'A#'],
    "E Major":   ['E', 'G#', 'B'],
    "F Major":   ['F', 'A', 'C'],
    "F# Major":  ['F#', 'A#', 'C#'],
    "G Major":   ['G', 'B', 'D'],
    "G# Major":  ['G#', 'C', 'D#'],
    "A Major":   ['A', 'C#', 'E'],
    "A# Major":  ['A#', 'D', 'F'],
    "B Major":   ['B', 'D#', 'F#'],

    # Minor Triads
    "C Minor":   ['C', 'D#', 'G'],
    "C# Minor":  ['C#', 'E', 'G#'],
    "D Minor":   ['D', 'F', 'A'],
    "D# Minor":  ['D#', 'F#', 'A#'],
    "E Minor":   ['E', 'G', 'B'],
    "F Minor":   ['F', 'G#', 'C'],
    "F# Minor":  ['F#', 'A', 'C#'],
    "G Minor":   ['G', 'A#', 'D'],
    "G# Minor":  ['G#', 'B', 'D#'],
    "A Minor":   ['A', 'C', 'E'],
    "A# Minor":  ['A#', 'C#', 'F'],
    "B Minor":   ['B', 'D', 'F#']
}

NOTE_FREQ = {
    'C': 261.63, 'C#': 277.18, 'D': 293.66, 'D#': 311.13, 'E': 329.63, 'F': 349.23,
    'F#': 369.99, 'G': 392.00, 'G#': 415.30, 'A': 440.00, 'A#': 466.16, 'B': 493.88
}


def synth_chord(notes, duration=1.0, sr=SAMPLE_RATE, noise_level=0.01):
    t = np.linspace(0, duration, int(sr * duration), False)
    audio = sum(np.sin(2 * np.pi * NOTE_FREQ[note] * t) for note in notes)
    audio /= len(notes)
    fade = np.linspace(1, 0.1, len(audio))
    audio = audio * fade
    # Add some random noise to make it more realistic
    noise = noise_level * np.random.randn(len(audio))
    audio += noise
    return audio.astype(np.float32)

X, y = [], []
for chord_label, notes in CHORDS.items():
    for i in range(50):  # Increased samples per chord
        duration = random.uniform(0.5, 1.5) # Vary duration
        audio = synth_chord(notes, duration=duration)
        fname = f'tmp_{chord_label}_{i}.wav'
        sf.write(fname, audio, SAMPLE_RATE)
        y_audio, sr = librosa.load(fname, sr=SAMPLE_RATE)
        # Use more detailed chroma features
        chroma_stft = librosa.feature.chroma_stft(y=y_audio, sr=sr)
        chroma_cqt = librosa.feature.chroma_cqt(y=y_audio, sr=sr)
        chroma_cens = librosa.feature.chroma_cens(y=y_audio, sr=sr)
        # Combine features
        feat = np.concatenate((np.mean(chroma_stft, axis=1),
                              np.mean(chroma_cqt, axis=1),
                              np.mean(chroma_cens, axis=1)))
        X.append(feat)
        y.append(chord_label)
X = np.array(X)
y = np.array(y)




























































































































































































































































































































































































































































































































































































































































































































































































































In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X, y)
print("ML chord classifier trained!")


ML chord classifier trained!


In [None]:
# -- Provide your audio file here:
audio_file = "scientists.wav"  # <---- CHANGE THIS

# Analyze per-beat chords (with ML)
y_song, sr = librosa.load(audio_file, sr=SAMPLE_RATE)
duration = librosa.get_duration(y=y_song, sr=sr)
tempo, beat_frames = librosa.beat.beat_track(y=y_song, sr=sr)
beat_times = librosa.frames_to_time(beat_frames, sr=sr)

# Chord prediction per beat
window_size = int(0.5 * sr)
ml_chord_on_beats = []
for i, beat_time in enumerate(beat_times):
    start_sample = int(max(0, sr * (beat_time - 0.25)))
    end_sample = int(min(len(y_song), sr * (beat_time + 0.25)))
    segment = y_song[start_sample:end_sample]
    if len(segment) == 0:
        ml_chord_on_beats.append(("Unknown", beat_time))
        continue
    # Match feature extraction to training
    chroma_stft_segment = librosa.feature.chroma_stft(y=segment, sr=sr)
    chroma_cqt_segment = librosa.feature.chroma_cqt(y=segment, sr=sr)
    chroma_cens_segment = librosa.feature.chroma_cens(y=segment, sr=sr)
    # Combine features and reshape for prediction
    feat = np.concatenate((np.mean(chroma_stft_segment, axis=1),
                          np.mean(chroma_cqt_segment, axis=1),
                          np.mean(chroma_cens_segment, axis=1))).reshape(1, -1)
    chord_label = clf.predict(feat)[0]
    ml_chord_on_beats.append((chord_label, beat_time))

# Merge repeated chords for better visuals
chord_timeline = []
for i in range(len(beat_times) - 1):
    chord_label = ml_chord_on_beats[i][0]
    start_time = beat_times[i]
    end_time = beat_times[i+1]
    chord_timeline.append((start_time, end_time, chord_label))
merged_chords = []
for segment in chord_timeline:
    if not merged_chords or merged_chords[-1][2] != segment[2]:
        merged_chords.append(segment)
    else:
        merged_chords[-1] = (merged_chords[-1][0], segment[1], merged_chords[-1][2])


















































































































































































































In [None]:
chord_palette = {
    # Major chords – cheerful, warm, and vivid
    "C Major":   ((255, 255, 0),     'circle'),     # Bright Yellow – Happy, sunny resolution
    "C# Major":  ((255, 0, 255),     'triangle'),   # Magenta – vivid, expressive
    "D Major":   ((0, 255, 0),       'circle'),     # Vibrant Green – Triumphant, fresh
    "D# Major":  ((255, 153, 51),    'hex'),        # Warm Orange – bold, animated
    "E Major":   ((255, 51, 255),    'star'),       # Light Magenta – open and colorful
    "F Major":   ((255, 204, 0),     'rect'),       # Golden Yellow – bright, balanced
    "F# Major":  ((102, 255, 102),   'triangle'),   # Soft Mint – smooth and lush
    "G Major":   ((255, 85, 0),      'circle'),     # Orange-Red – lively, warm
    "G# Major":  ((255, 0, 102),     'star'),       # Hot Pink – playful, vivid
    "A Major":   ((255, 255, 255),   'rect'),       # White – radiant, uplifting (you may change this)
    "A# Major":  ((0, 255, 255),     'triangle'),   # Cyan – energetic and clear
    "B Major":   ((255, 0, 0),       'hex'),        # True Red – confident, intense

    # Minor chords – deep, melancholic, or rich
    "C Minor":   ((51, 0, 153),      'rect'),       # Indigo – calm, reflective
    "C# Minor":  ((75, 0, 130),      'hex'),        # Dark Violet – poetic, obscure
    "D Minor":   ((0, 102, 204),     'triangle'),   # Deep Blue – introspective
    "D# Minor":  ((102, 0, 204),     'star'),       # Midnight Purple – mysterious
    "E Minor":   ((153, 51, 255),    'triangle'),   # Rich Violet – somber but rich
    "F Minor":   ((0, 51, 153),      'hex'),        # Navy Blue – deep, serious
    "F# Minor":  ((0, 0, 204),       'star'),       # Cobalt – cold, immersive
    "G Minor":   ((30, 70, 180),     'rect'),       # Ocean Blue – fragile, thoughtful
    "G# Minor":  ((60, 60, 220),     'triangle'),   # Steel Blue – icy, controlled
    "A Minor":   ((0, 0, 153),       'triangle'),   # Deep Blue – sad or mellow
    "A# Minor":  ((102, 0, 204),     'star'),       # Blue-Violet – dramatic, mysterious
    "B Minor":   ((85, 85, 255),     'hex'),        # Electric Indigo – nostalgic, dreamy

    "Unknown":   ((160, 160, 160),   'circle')      # Mid Gray – undefined
}
W, H = 720, 720


def draw_shape(draw, shape, color, size, center, t_frac):
    x, y = center
    if shape == "circle":
        r = int(size * (0.9 + 0.15*np.sin(2*np.pi*t_frac)))
        draw.ellipse([x - r, y - r, x + r, y + r], fill=color, outline=None)
    elif shape == "rect":
        s = int(size * (0.85 + 0.2*np.cos(2*np.pi*t_frac)))
        draw.rectangle([x - s, y - s, x + s, y + s], fill=color)
    elif shape == "triangle":
        s = int(size * (0.85 + 0.2*np.sin(4*np.pi*t_frac)))
        pts = [(x, y - s), (x - s, y + s), (x + s, y + s)]
        draw.polygon(pts, fill=color)
    elif shape == "hex":
        s = int(size * (0.85 + 0.15*np.cos(4*np.pi*t_frac)))
        angle = np.linspace(0, 2*np.pi, 7)
        pts = [(x + s*np.cos(a), y + s*np.sin(a)) for a in angle]
        draw.polygon(pts, fill=color)
    elif shape == "star":
        s = size
        pts = []
        for i in range(10):
            r = s if i % 2 == 0 else s//2
            theta = np.pi/5 * i + 2*np.pi*t_frac
            pts.append((x + int(r * np.sin(theta)), y - int(r * np.cos(theta))))
        draw.polygon(pts, fill=color)
    # More shapes

def make_frame(t):
    idx = np.searchsorted([bt for _, bt in ml_chord_on_beats], t, side='right') - 1
    chord, bt = ml_chord_on_beats[max(idx, 0)]
    color, shape = chord_palette.get(chord, ((180,180,180), "circle"))
    img = Image.new("RGB", (W, H), (30, 30, 30))
    draw = ImageDraw.Draw(img)
    # Artistic transitions
    next_idx = min(idx+1, len(ml_chord_on_beats)-1)
    bt1 = ml_chord_on_beats[idx][1]
    bt2 = ml_chord_on_beats[next_idx][1] if next_idx != idx else duration
    t_frac = (t - bt1) / max(0.001, (bt2 - bt1))
    next_color = chord_palette.get(ml_chord_on_beats[next_idx][0], ((180,180,180), shape))[0]
    curr_col = tuple(int((1-t_frac)*c1 + t_frac*c2) for c1, c2 in zip(color, next_color))
    draw_shape(draw, shape, curr_col, 170, (W//2, H//2), t_frac)
    try:
        font = ImageFont.truetype("DejaVuSans-Bold.ttf", 54)
    except:
        font = ImageFont.load_default()
    draw.text((W//2-70, H//2+180), chord, font=font, fill=(255,255,255,220))
    return np.array(img)

# %%
video_duration = duration
video = VideoClip(make_frame, duration=video_duration)
audio = AudioFileClip(audio_file).subclip(0, video_duration)
video = video.set_audio(audio)

outpath = tempfile.mktemp(suffix='.mp4')
video.write_videofile(outpath, fps=12, codec="libx264", audio_codec="aac")

In [None]:
video_duration = duration
video = VideoClip(make_frame, duration=video_duration)
audio = AudioFileClip(audio_file).subclip(0, video_duration)
video = video.set_audio(audio)

outpath = tempfile.mktemp(suffix='.mp4')
video.write_videofile(outpath, fps=12, codec="libx264", audio_codec="aac")
