<a href="https://colab.research.google.com/github/alighalebx/Guitar-Musical-Notes/blob/main/Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# try:
#     from google.colab import drive
#     drive.mount('/content/drive', force_remount=True)
#     COLAB = True
#     print("Note: using Google CoLab")
# except:
#     print("Note: not using Google CoLab")
#     COLAB = False

PATH = '/content/drive/MyDrive/projects/audio'

!pip install -U kaleido

# Configuration
FPS = 30
FFT_WINDOW_SECONDS = 0.25 # how many seconds of audio make up an FFT window

# Note range to display
FREQ_MIN = 80  # Adjust as needed
FREQ_MAX = 1200  # Adjust as needed

# Notes to display
TOP_NOTES = 3

# Names of the notes
NOTE_NAMES = ["E2", "A2", "D3", "G3", "B3", "E4"]
notesList = [['E2', '82.41'],
        ['F2', '87.31'],
        ['F#2/Gb2', '92.5'],
        ['G2', '98'],
        ['G#2/Ab2', '103.83'],
        ['A2', '110'],
        ['A#2/Bb2', '116.54'],
        ['B2', '123.47'],
        ['C3', '130.81'],
        ['C#3/Db3', '138.59'],
        ['D3', '146.83'],
        ['D#3/Eb3', '155.56'],
        ['E3', '164.81'],
        ['F3', '174.61'],
        ['F#3/Gb3', '185'],
        ['G3', '196'],
        ['G#3/Ab3', '207.65'],
        ['A3', '220'],
        ['A#3/Bb3', '233.08'],
        ['B3', '246.94'],
        ['C4', '261.63'],
        ['C#4/Db4', '277.18'],
        ['D4', '293.66'],
        ['D#4/Eb4', '311.13'],
        ['E4', '329.63'],
        ['F4', '349.23'],
        ['F#4/Gb4', '369.99'],
        ['G4', '392'],
        ['G#4/Ab4', '415.3'],
        ['A4', '440'],
        ['A#4/Bb4', '466.16'],
        ['B4', '493.88'],
        ['C5', '523.25'],
        ['C#5/Db5', '554.37'],
        ['D5', '587.33'],
        ['D#5/Eb5', '622.25'],
        ['E5', '659.25']]


# Output size. Generally use SCALE for higher res, unless you need a non-standard aspect ratio.
RESOLUTION = (1920, 1080)
SCALE = 2 # 0.5=QHD(960x540), 1=HD(1920x1080), 2=4K(3840x2160)

Mounted at /content/drive
Note: using Google CoLab


In [None]:
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from scipy.io import wavfile # get the api
import os

# Get a WAV file from GDrive, such as:
# AUDIO_FILE = os.path.join(PATH,'short_popcorn.wav')

# Or download my sample audio
# !wget https://github.com/jeffheaton/present/raw/master/youtube/video/sample_audio/piano_c_major_scale.wav
AUDIO_FILE = "/content/audio2.wav"

fs, data = wavfile.read(AUDIO_FILE) # load the data
audio = data.T[0] # this is a two channel soundtrack, get the first track
FRAME_STEP = (fs / FPS) # audio samples per video frame
FFT_WINDOW_SIZE = int(fs * FFT_WINDOW_SECONDS)
AUDIO_LENGTH = len(audio)/fs

In [None]:
import plotly.graph_objects as go

def plot_fft(p, xf, fs, notes, dimensions=(960,540)):
  layout = go.Layout(
      title="frequency spectrum",
      autosize=False,
      width=dimensions[0],
      height=dimensions[1],
      xaxis_title="Frequency (note)",
      yaxis_title="Magnitude",
      font={'size' : 24}
  )

  fig = go.Figure(layout=layout,
                  layout_xaxis_range=[FREQ_MIN,FREQ_MAX],
                  layout_yaxis_range=[0,1]
                  )

  fig.add_trace(go.Scatter(
      x = xf,
      y = p))

  for note in notes:
    fig.add_annotation(x=note[0]+10, y=note[2],
            text=note[1],
            font = {'size' : 48},
            showarrow=False)
  return fig

def extract_sample(audio, frame_number):
  end = frame_number * FRAME_OFFSET
  begin = int(end - FFT_WINDOW_SIZE)

  if end == 0:
    # We have no audio yet, return all zeros (very beginning)
    return np.zeros((np.abs(begin)),dtype=float)
  elif begin<0:
    # We have some audio, padd with zeros
    return np.concatenate([np.zeros((np.abs(begin)),dtype=float),audio[0:end]])
  else:
    # Usually this happens, return the next sample
    return audio[begin:end]

def find_top_notes(fft, num):
    if np.max(fft.real) < 0.001:
        return []

    lst = [x for x in enumerate(fft.real)]
    lst = sorted(lst, key=lambda x: x[1], reverse=True)

    idx = 0
    found = []
    found_note = set()

    while idx < len(lst) and len(found) < num:
        f = xf[lst[idx][0]]
        y = lst[idx][1]
        n = freq_to_number(f)

        if n not in found_note:
            found_note.add(n)
            s = [f, n, y]
            found.append(s)
        idx += 1

    return found

In [None]:
import numpy as np
import tqdm
import pandas as pd


# See https://newt.phys.unsw.edu.au/jw/notes.html
# Adjust the function to convert frequency to note number

def freq_to_number(f):
    nearest_freq = min(notesList, key=lambda x: abs(float(x[1]) - f))
    return nearest_freq[0]

# Adjust the function to convert note name to frequency
def number_to_freq(n):
    return float([freq for note, freq in notesList if note == n][0])

# def freq_to_number(f): return 47 + 12 * np.log2(f / 82.41)  # Adjust the reference frequency (82.41 Hz is E2)

# # Adjust the function to convert note number to frequency
# def number_to_freq(n): return 82.41 * 2.0**((n - 47) / 12.0)  # Adjust the reference frequency

# Adjust the function to get the note name
# def note_name(n): return NOTE_NAMES[int(n) % 6] + str(int(n / 6))

# Hanning window function
window = 0.5 * (1 - np.cos(np.linspace(0, 2*np.pi, FFT_WINDOW_SIZE, False)))

xf = np.fft.rfftfreq(FFT_WINDOW_SIZE, 1/fs)
FRAME_COUNT = int(AUDIO_LENGTH*FPS)
FRAME_OFFSET = int(len(audio)/FRAME_COUNT)
data_list = []

# Pass 1, find out the maximum amplitude so we can scale.
mx = 0
for frame_number in range(FRAME_COUNT):
  sample = extract_sample(audio, frame_number)

  fft = np.fft.rfft(sample * window)
  fft = np.abs(fft).real
  mx = max(np.max(fft),mx)

print(f"Max amplitude: {mx}")

# Pass 2, produce the animation
for frame_number in tqdm.tqdm(range(FRAME_COUNT)):
  sample = extract_sample(audio, frame_number)

  fft = np.fft.rfft(sample * window)
  fft = np.abs(fft) / mx

  s = find_top_notes(fft,TOP_NOTES)
  data_list.append({
        'Frame Number': frame_number,
        'FFT Values': fft.real.tolist(),
        'Top Notes': s
    })

  # fig = plot_fft(fft.real,xf,fs,s,RESOLUTION)
  # fig.write_image(f"/content/frame{frame_number}.png")
df = pd.DataFrame(data_list)

# Save the DataFrame to an Excel file
df.to_excel("/content/fft_data2.xlsx", index=False)

Max amplitude: 40283391.4071577


100%|██████████| 5232/5232 [19:45<00:00,  4.42it/s]


In [None]:
# !ffmpeg -y -r {FPS} -f image2 -s 1920x1080 -i frame%d.png -i {AUDIO_FILE} -c:v libx264 -pix_fmt yuv420p movie.mp4

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [None]:

from google.colab import files
files.download('movie.mp4')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>