# Timing ```04_record_loops``` using ```timeit``` module
Firstly, lets initialize all the necessary variables, as that is not something I have to time:

In [59]:
from typing import Any
import numpy.typing as npt

import sounddevice as sd
import numpy as np
import soundfile as sf
import threading
import random
import timeit


SAMPLERATE = 44100  # [samples per second]
BLOCKSIZE = 1000  # [samples]
DTYPE = np.int16
STR_DTYPE = "int16"
CHANNELS = 2
LATENCY = 0

METRONOME_SAMPLE_PATH = "lib/samples/metronome.wav"

recording = False
stream_active = True

tracks: list[npt.NDArray[DTYPE]] = []
recorded_track: npt.NDArray[DTYPE] = np.empty(shape=(0, CHANNELS), dtype=DTYPE)
current_frame = 0
len_beat: int  # number of samples per beat

It is questionable whether some of these could be optimalised. E.g. using numpy array instead of list for ```tracks``` could be faster. However, different tracks need to differ in length and I do not know whether numpy allows this.

Next, lets analyze the functions in the program. There are three groups of functions:
- ```metronome_generator``` and ```initialize_metronome```, which execute before the body of the program,
- ```input_checker``` and ```post_production```, which are executed in the input thread,
-  ```main``` and ```callback```, which are executed in the thread which is processing audio.

As the first ones do not need to be timed, lets execute them now so the other ones can use the metronome sample.

In [60]:
def metronome_generator(bpm: int, path: str) -> npt.NDArray[DTYPE]:
    global len_beat

    sample: npt.NDArray[DTYPE]
    sample, fs = sf.read(file=path, dtype=STR_DTYPE)

    desired_len = int((60*fs)/bpm)
    len_beat = desired_len

    if len(sample) <= desired_len:
        # rounding desired_len introduces a distortion of bpm
        sample = np.concatenate(
            (sample, np.zeros(shape=(desired_len-len(sample), CHANNELS), dtype=DTYPE)))
    else:
        sample = sample[:desired_len]

    # adjust volume
    sample = (sample/4).astype(dtype=DTYPE)

    return sample


def initialize_metronome() -> None:
    global tracks
    # initialize metronome
    bpm = int(input("bpm: "))
    metronome = metronome_generator(bpm=bpm, path=METRONOME_SAMPLE_PATH)
    tracks.append(metronome)

The other two groups run in the same thread, for python does not allow true parallelism, so it is necessary to optimalise both of them. Lets start with the audio processing, as I suppose it may require more CPU power and I also have some ideas for optimalisation. 

In [61]:
def main() -> None:
    """ processes the audio """
    global stream_active

    def callback(indata: npt.NDArray[DTYPE], outdata: npt.NDArray[DTYPE],
                 frames: int, time: Any, status: sd.CallbackFlags) -> None:
        global current_frame
        global tracks
        global recorded_track

        if status:
            print(status)

        if recording:
            recorded_track = np.concatenate([recorded_track, indata])

        # mixer & slicer
        num_tracks = len(tracks)
        data = (indata/(num_tracks + 1)).astype(dtype=DTYPE)

        for track in tracks:
            # slice
            start = current_frame % len(track)
            end = (current_frame+frames) % len(track)
            if end < start:
                track_slice = np.concatenate(
                    (track[start:], track[:end]))
            else:
                track_slice = track[start:end]
            # mix
            track_slice = (track_slice/(num_tracks+1)).astype(dtype=DTYPE)

            data += track_slice

        outdata[:] = data
        current_frame += frames

    try:
        with sd.Stream(samplerate=SAMPLERATE, blocksize=BLOCKSIZE, dtype=STR_DTYPE,
                       channels=CHANNELS, callback=callback):
            while stream_active:
                pass
    finally:
        print("Good bye!")

Ideas:
- is using global variables slow?
- the for loop is certainly inefficient -> use numpy.mean() for the mixing
    - how to improve the cutting?
- is the handling of large integers in ```current_frame``` okay?
- is the while loop, which is keeping the stream active, draining CPU power? If yes, how to keep the stream alive without this drawback?

First of all, mixing and cutting. Or just mixing, as it is a smaller piece of code. Lets initialize some track data, so we can test different approaches:

In [73]:
LEN_TRACK = 100
NUM_TRACKS = 10
NUMBER = 100
REPEAT = 1000

def generate_random_track(length: int, channels: int = 2) -> npt.NDArray[DTYPE]:
    return np.array([[random.randint(-2**15, 2**15-1), random.randint(-2**15, 2**15-1)] for i in range(length)])

cut_tracks = np.array([generate_random_track(LEN_TRACK) for i in range(NUM_TRACKS)])


for_loop = """
data = cut_tracks[0]
for track_slice in cut_tracks:
    track_slice = (track_slice/(NUM_TRACKS+1)).astype(dtype=DTYPE)
    data += track_slice"""
for_loop_time = min(timeit.repeat(stmt=for_loop, number=NUMBER, repeat=REPEAT, globals=globals()))

np_mean = """
data = np.mean(a=cut_tracks, axis=0)
"""
mean_time = min(timeit.repeat(stmt=for_loop, number=NUMBER, repeat=REPEAT, globals=globals()))

Now we can comparte those two:

In [74]:
difference = for_loop_time - mean_time
if difference < 0:
    print(f"for loop was {difference*-1} second faster")
else:
    print(f"np.mean was {difference} second faster")

for loop was 0.002010975999837683 second faster
