# Audio Clustering

## Imports

In [None]:
import audio_analysis
import audio_model
import audio_synthesis
import audio_gui
import audio_control

import librosa
import numpy as np
import os, sys, time, subprocess, threading

import sounddevice as sd

import IPython
from IPython.display import display
import ipywidgets as widgets

from PyQt5 import QtWidgets
from PyQt5.QtCore import Qt
import pyqtgraph as pg
import pyqtgraph.opengl as gl
from pathlib import Path

## Settings

## Audio Settings

In [None]:
audio_file_path = "../../Data/Audio/Gutenberg/Night_and_Day_by_Virginia_Woolf_48khz.wav"
audio_sample_rate = 48000
audio_channel_count = 1
audio_range_sec = [ 10.0, 70.0 ]
audio_excerpt_length = 100 # in milisecs
audio_excerpt_offset = 90 # in milisecs

audio_output_device = 2
audio_buffer_size = 512

audio_file_gui = widgets.Text(value=audio_file_path, description="Audio File:", style={'description_width': 'initial'}) 
audio_sample_rate_gui = widgets.IntText(value=audio_sample_rate, description="Audio Sample Rate:", style={'description_width': 'initial'})
audio_channel_count_gui = widgets.IntText(value=audio_channel_count, description="Audio Channel Count:", style={'description_width': 'initial'})
audio_range_sec_gui_1 = widgets.FloatText(value=audio_range_sec[0], description="Audio Range Start (sec):", style={'description_width': 'initial'})
audio_range_sec_gui_2 = widgets.FloatText(value=audio_range_sec[1], description="Audio Range End (sec):", style={'description_width': 'initial'})
audio_excerpt_length_gui = widgets.IntText(value=audio_excerpt_length, description="Audio Excerpt Length (milisec):", style={'description_width': 'initial'})
audio_excerpt_offset_gui = widgets.IntText(value=audio_excerpt_offset, description="Audio Excerpt Offset (milisec):", style={'description_width': 'initial'})
audio_output_device_gui = widgets.IntText(value=audio_output_device, description="Audio Output Device:", style={'description_width': 'initial'})
audio_buffer_size_gui = widgets.IntText(value=audio_buffer_size, description="Audio Buffer Size:", style={'description_width': 'initial'})

display(audio_file_gui)
display(audio_sample_rate_gui)
display(audio_channel_count_gui)
display(audio_range_sec_gui_1)
display(audio_range_sec_gui_2)
display(audio_excerpt_length_gui)
display(audio_excerpt_offset_gui)
print(sd.query_devices())
display(audio_output_device_gui)
display(audio_buffer_size_gui)

In [None]:
audio_file_path = audio_file_gui.value
audio_sample_rate = audio_sample_rate_gui.value
audio_channel_count = audio_channel_count_gui.value
audio_range_sec[0] = audio_range_sec_gui_1.value
audio_range_sec[1] = audio_range_sec_gui_2.value
audio_excerpt_length_gui = audio_excerpt_length_gui.value
audio_excerpt_offset = audio_excerpt_offset_gui.value

audio_output_device = audio_output_device_gui.value
audio_buffer_size = audio_buffer_size_gui.value

## Cluster Settings

In [None]:
cluster_count = 20
cluster_random_state = 170

cluster_count_gui = widgets.IntText(value=cluster_count, description="Cluster Count:", style={'description_width': 'initial'})
cluster_random_state_gui = widgets.IntText(value=cluster_random_state, description="Cluster Random State:", style={'description_width': 'initial'})

display(cluster_count_gui)
display(cluster_random_state_gui)

In [None]:
cluster_count = cluster_count_gui.value
cluster_random_state = cluster_random_state_gui.value

## OSC Receive Settings

In [None]:
osc_receive_ip = "0.0.0.0"
osc_receive_port = 9002

osc_receive_ip_gui = widgets.Text(value=osc_receive_ip, description="OSC Receive IP:", style={'description_width': 'initial'}) 
osc_receive_port_gui = widgets.IntText(value=osc_receive_port, description="OSC Receive Port:", style={'description_width': 'initial'})

display(osc_receive_ip_gui)
display(osc_receive_port_gui)

In [None]:
osc_receive_ip = osc_receive_ip_gui.value
osc_receive_port = osc_receive_port_gui.value

## Load Audio

In [None]:
audio_waveform, _ = librosa.load(audio_file_path, sr=audio_sample_rate)
audio_waveform = audio_waveform[int(audio_range_sec[0] * audio_sample_rate):int(audio_range_sec[1] * audio_sample_rate)]
audio_waveform_sc = audio_waveform.shape[0]

## Create Audio Excerpts

In [None]:
audio_excerpts = []

audio_excerpt_length_sc = int(audio_excerpt_length / 1000 * audio_sample_rate)
audio_excerpt_offset_sc = int(audio_excerpt_offset / 1000 * audio_sample_rate)

for sI in range(0, audio_waveform_sc - audio_excerpt_length_sc, audio_excerpt_offset_sc):
    
    audio_excerpt = audio_waveform[sI:sI + audio_excerpt_length_sc]
    audio_excerpts.append(audio_excerpt)
    
audio_excerpts = np.stack(audio_excerpts, axis=0)

## Calculate Audio Features

In [None]:
audio_features = {}
audio_features["waveform"] = audio_excerpts
audio_features["root mean square"] = audio_analysis.rms(audio_excerpts)
audio_features["chroma stft"] = audio_analysis.chroma_stft(audio_excerpts, audio_sample_rate)
#audio_features["chroma cqt"] = audio_analysis.chroma_cqt(audio_excerpts, audio_sample_rate)
#audio_features["chroma cens"] = audio_analysis.chroma_cens(audio_excerpts, audio_sample_rate)
#audio_features["chroma vqt"] = audio_analysis.chroma_vqt(audio_excerpts, audio_sample_rate)
audio_features["mel spectrogram"] = audio_analysis.mel_spectrogram(audio_excerpts, audio_sample_rate)
audio_features["mfcc"] = audio_analysis.mfcc(audio_excerpts, audio_sample_rate)
audio_features["spectral centroid"] = audio_analysis.spectral_centroid(audio_excerpts, audio_sample_rate)
audio_features["spectral bandwidth"] = audio_analysis.spectral_bandwidth(audio_excerpts, audio_sample_rate)
audio_features["spectral contrast"] = audio_analysis.spectral_contrast(audio_excerpts, audio_sample_rate)
audio_features["spectral flatness"] = audio_analysis.spectral_flatness(audio_excerpts)
audio_features["spectral rolloff"] = audio_analysis.spectral_rolloff(audio_excerpts, audio_sample_rate)
#audio_features["tempo"] = audio_analysis.tempo(audio_excerpts, audio_sample_rate)
audio_features["tempogram"] = audio_analysis.tempogram(audio_excerpts, audio_sample_rate)
#audio_features["tempogram ratio"] = audio_analysis.tempogram_ratio(audio_excerpts, audio_sample_rate)

## Create Clustering Model

In [None]:
audio_model.config = {
    "audio_excerpts": audio_excerpts,
    "audio_features": audio_features,
    "cluster_method": "kmeans",
    "cluster_count": cluster_count,
    "cluster_random_state": cluster_random_state
    }

clustering = audio_model.createModel(audio_model.config)

## Create Cluster Player

In [None]:
audio_synthesis.config = {
    "model": clustering,
    "audio_excerpts": audio_excerpts,
    "audio_sample_rate": audio_sample_rate,
    "audio_excerpt_length": audio_excerpt_length,
    "audio_excerpt_offset": audio_excerpt_offset
    }

synthesis = audio_synthesis.AudioSynthesis(audio_synthesis.config)
synthesis.setClusterLabel(1)
synthesis.selectAudioFeature(list(audio_features.keys())[0])

## Create OSC Control

In [None]:
audio_control.config["synthesis"] = synthesis
audio_control.config["model"] = clustering
audio_control.config["ip"] = osc_receive_ip
audio_control.config["port"] = osc_receive_port

osc_control = audio_control.AudioControl(audio_control.config)

## Start Real-Time Audio and OSC Control

In [None]:
def audio_callback(out_data, frame_count, time_info, status):
    
    synthesis.update(out_data.reshape(-1))

audio_stream = sd.OutputStream(
    samplerate=audio_sample_rate, device=audio_output_device, channels=audio_channel_count,
    callback=audio_callback)

osc_control.start()
audio_stream.start()

## Interactive Audio Control

In [None]:
cluster_label = 1
cluster_feature = "waveform"

cluster_label_gui = widgets.IntText(value=cluster_label, description="Cluster Label:", style={'description_width': 'initial'})
cluster_feature_gui = widgets.Dropdown(
    options=[audio_feature_name for audio_feature_name in audio_features.keys()],
    value=list(audio_features)[0],  # default selected value
    description='Audio Feature:',
    style={'description_width': 'initial'}
)

display(cluster_label_gui)
display(cluster_feature_gui)

def on_cluster_label_change(value):
    global cluster_label
    cluster_label = value['new']
    synthesis.setClusterLabel(cluster_label)

def on_cluster_feature_change(value):
    global cluster_feature
    cluster_feature = value['new']
    synthesis.selectAudioFeature(cluster_feature)

cluster_label_gui.observe(on_cluster_label_change, names='value')
cluster_feature_gui.observe(on_cluster_feature_change, names='value')

## Stop Real-Time Audio and OSC Control

In [None]:
audio_stream.stop()
osc_control.stop()