## **Oracle: Predict your original project's success**

In [21]:
import tensorflow as tf 
import pyaudio
import matplotlib.pyplot as plt
import matplotlib
import IPython
import struct
import numpy as np
import librosa
import librosa.display


### Option 1: Real Time Audio Capture & Waveform Visualizer

In [22]:
matplotlib.use('TkAgg') # using tkinter for visualization <3

CHUNK = 1024 * 4  # 4096 samples per chunk
FORMAT = pyaudio.paInt16
CHANNELS = 1  # mono sound because only one mic
RATE = 44100 #standard rate 44.1 khz

In [23]:
p = pyaudio.PyAudio()

stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True, #initialize as true
    output=True, #initialize as true
    frames_per_buffer=CHUNK
)

In [24]:
# create the plot
fig, ax = plt.subplots()
plt.ion()  # interactive mode on
x = np.arange(0, 2 * CHUNK)  # x values should match the length of data_int
line, = ax.plot(x, np.random.rand(2 * CHUNK), color='pink')

In [25]:
# set y-axis limits to fit 16-bit audio
ax.set_ylim(-2000, 2000)
ax.set_xlim(0, 2 * CHUNK)

(0.0, 8192.0)

In [26]:
# set plot labels and title
ax.set_title("Real-Time Audio Waveform")
ax.set_xlabel("Sample Index")
ax.set_ylabel("Amplitude")

Text(0, 0.5, 'Amplitude')

In [27]:
try:
    while True:
        # read the audio data
        data = stream.read(CHUNK * 2)

        # unpack the data into 16-bit integers and center it around zero
        data_int = np.array(struct.unpack(str(2 * CHUNK) + 'h', data), dtype='int16')

        # update the line plot with new data
        line.set_ydata(data_int)

        # draw and flush the plot
        fig.canvas.draw()
        fig.canvas.flush_events()
        plt.pause(0.01)  # Small pause to give GUI time to update
except KeyboardInterrupt:
    print("Stream stopped by user.")
    stream.stop_stream()
    stream.close()
    p.terminate()

Stream stopped by user.


### Option 2: User-Uploaded Files

In [28]:
# use librosa for audio files, whereas pyaudio is used for real-time audio streams
test_audio = 'C:\\Users\\adith\\OneDrive\\Desktop\\vs code xoxo\\personal\\Oracle\\oracle_venv\\fresh-clone\\california_world.mp3'
y, sr = librosa.load(test_audio)

#load in the file: 

audio_data, sample_rate = librosa.load(test_audio, sr=None)

#print to check: 

print(f'Sample Rate: {sample_rate}')
print(f'Audio Data: {audio_data}')


#plot waveform for funzies: 

plt.figure(figsize=(10,4))
librosa.display.waveshow(audio_data, sr=sample_rate, color='pink')
plt.title('Audio Waveform')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.show()

Sample Rate: 44100
Audio Data: [ 0.000000e+00  0.000000e+00  0.000000e+00 ... -9.304419e-14 -9.690668e-14
 -8.942789e-14]


### Feature Extraction for the Uploaded Waveform

MFCCs: 
- a representation of short-term power spectrum of a sound 
- based on a linear cosine transform of a log of the power spectrum on a non-linear mel scale
- commonly used in speech & audio processing tasks 
- can recognize emotions in a speaker's voice and classify music into genres 

Chroma Features: 
- chroma features represent the 12 different pitch classes of a musical octave 
- really good at picking up slight changes in timbre & instrumentation 
- used in music retrieval tasks like chord recognition & key detection 
- a way to represent the tonal context of a musical audio signal in a condensed form 

Spectral Contrast: 
- measures the difference in amplitude between peaks and valleys in a sound spectrum 
- captures the relative distribution of energy across the frequency spectrum and is useful for distinguishing between different types of sounds such as speech and music

Tonnetz: 
- represent the harmonic & melodic components of music 
- captures relationships between pitches and is also useful for key detection and chord recognition

Mel-Scaled Spectrogram: 
- a spectrogram where the frequencies are scaled according the mel scale, which approximates the human ear's perception of sound 
- useful for visualizing the frequencies within an audio signal in a way that lines up with the way our ears would hear it 

In [31]:
#step 1: Extract features:

mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)

NameError: name 'y' is not defined

In [32]:
#step 2: Combine features: 

mfccs_mean = np.mean(mfccs, axis=1)
chroma_mean = np.mean(chroma, axis=1)
spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
tonnetz_mean = np.mean(tonnetz, axis=1)

NameError: name 'mfccs' is not defined

In [33]:
#step 3: Combine into a single feature vector
feature_vector = np.hstack([mfccs_mean, chroma_mean, spectral_contrast_mean, tonnetz_mean])

NameError: name 'mfccs_mean' is not defined