### Starting with PyAudio

In [1]:
import pyaudio
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
import time
from tkinter import TclError

# use this backend to display in separate Tk window
%matplotlib tk

# constants
CHUNK = 1024 * 2             # samples per frame
FORMAT = pyaudio.paInt16     # audio format (bytes per sample?)
CHANNELS = 1                 # single channel for microphone
RATE = 44100                 # samples per second

In [14]:
chunk_size = 1024 # number of audio samples per frame
sample_rate = 44100  # sample rate in Hz

# initialize PyAudio
p = pyaudio.PyAudio()

# open audio stream
stream = p.open(format=pyaudio.paInt16,
                channels=1,
                rate=sample_rate,
                input=True,
                frames_per_buffer=chunk_size)

# create matplotlib figure and axis objects
fig, ax = plt.subplots()

# create a line object with random data
x = np.arange(0, 2 * chunk_size, 2)
line, = ax.plot(x, np.random.rand(chunk_size))

# set axis limits
ax.set_ylim(-32768, 32767)
ax.set_xlim(0, chunk_size)

# for measuring frame rate
frame_count = 0
start_time = time.time()

# continuously read audio data from the stream
while True:
    data = stream.read(chunk_size)
    # convert the data to a numpy array
    data_array = np.frombuffer(data, dtype=np.int16)
    # update the line data
    line.set_ydata(data_array)
    # draw the line on the plot
    try:
        print(data_array)
    except TclError:
        frame_rate = frame_count / (time.time() - start_time)
        
        print('stream stopped')
        print('average frame rate = {:.0f} FPS'.format(frame_rate))
        break
    

[-32768 -32768 -32768 ... -32768 -32768 -32768]
[-32768 -32768 -32768 ... -32768 -32768 -32768]
[-32768 -32768 -32768 ... -32768 -32768 -32768]
[-32768 -32768 -32768 ... -32768 -32768 -32768]
[-32768 -32768 -32768 ... -32768 -32768 -32768]
[-32768 -32768 -32768 ... -18419 -18381 -18367]
[-18341 -18351 -18309 ...   -908   -844   -799]
[ -834  -798  -861 ... 11385 11293 11308]
[11324 11285 11315 ... 18766 18750 18827]
[18750 18771 18847 ... 23012 23051 22968]
[23042 23009 22943 ... 24829 24823 24889]
[24790 24868 24807 ... 24506 24522 24494]
[24528 24566 24525 ... 23475 23433 23441]
[23463 23413 23412 ... 21134 21146 21146]
[21053 21098 21084 ... 18418 18390 18418]
[18439 18358 18480 ... 15423 15408 15390]
[15449 15359 15468 ... 12491 12515 12466]
[12471 12445 12446 ...  9492  9498  9501]
[9535 9543 9472 ... 6442 6423 6412]
[6368 6380 6304 ... 3752 3738 3740]
[3783 3769 3751 ... 1479 1419 1453]
[1442 1423 1493 ... -649 -704 -687]
[ -654  -677  -545 ... -2686 -2718 -2750]
[-2694 -2700 -26

KeyboardInterrupt: 

In [9]:
import pyaudio
import wave

# set the parameters
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

# initialize PyAudio
audio = pyaudio.PyAudio()

# start recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK)
print("Recording...")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("Finished recording.")

# stop recording
stream.stop_stream()
stream.close()
audio.terminate()

# save the recording to a WAV file
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()


Recording...
Finished recording.


### Music transcription

In [13]:
import pyaudio
import numpy as np
import librosa
from music21 import *
from madmom.features.onsets import CNNOnsetProcessor
from madmom.features.notes import NotePeakPickingProcessor

# set up the audio recording parameters
FORMAT = pyaudio.paFloat32
CHANNELS = 1
RATE = 44100
CHUNK_SIZE = 1024

# set up the onset and note processors
onset_processor = CNNOnsetProcessor()
note_processor = NotePeakPickingProcessor()

# initialize PyAudio
audio = pyaudio.PyAudio()

# start recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK_SIZE)

print("Recording...")

while True:
    # read audio data from the stream
    data = stream.read(CHUNK_SIZE)

    # convert the data to a NumPy array
    samples = np.frombuffer(data, dtype=np.float32)

    # extract the onsets and notes from the audio
    onsets = onset_processor(samples)
    notes = note_processor(samples)

    # create a music21 Stream object from the notes
    stream = stream.Stream()
    for note in notes:
        pitch = note[0]
        duration = note[1] - note[0]
        quarterLength = librosa.samples_to_time(duration, sr=RATE)
        n = note.Note(pitch, quarterLength=quarterLength)
        stream.append(n)

    # display the musical notation
    stream.show('midi')



ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject