In [7]:
!git clone https://github.com/barriosai/ai_for_audio.git

Cloning into 'ai_for_audio'...
remote: Enumerating objects: 25, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 25 (delta 9), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (25/25), 2.52 MiB | 8.21 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [8]:
import os

# Navigate to the repository directory
os.chdir('ai_for_audio')

# List the contents to verify
os.listdir()


['LICENSE',
 'ai_for_audio_video_03_04.ipynb',
 'ai_for_audio_video_03_05.ipynb',
 '.gitignore',
 'Archive.zip',
 '[SHARED]_ai_for_aduio_03_07_mnist_image_classifier.ipynb',
 'README.md',
 '[SHARED]_03_06_ai_for_audio_Perceptron_to_MLP.ipynb',
 '.git']

In [9]:
import zipfile

with zipfile.ZipFile('Archive.zip', 'r') as zip_ref:
    zip_ref.extractall('chords_data')


In [10]:
os.listdir('chords_data')


['minor_chords', 'major_chords', '__MACOSX']

In [11]:
import shutil

# Remove the __MACOSX folder if it exists
macosx_path = 'chords_data/__MACOSX'
if os.path.exists(macosx_path) and os.path.isdir(macosx_path):
    shutil.rmtree(macosx_path)

# Check the structure again to confirm deletion
os.listdir('chords_data')


['minor_chords', 'major_chords']

In [12]:
import os

# Path to the major_chords and minor_chords directories
folders = ['chords_data/major_chords', 'chords_data/minor_chords']

# Remove any files that start with '._'
for folder in folders:
    for file_name in os.listdir(folder):
        if file_name.startswith('._'):
            file_path = os.path.join(folder, file_name)
            os.remove(file_path)
            print(f"Removed {file_path}")


Removed chords_data/major_chords/._5_major_chord.wav
Removed chords_data/major_chords/._4_major_chord.wav
Removed chords_data/major_chords/._2_major_chord.wav
Removed chords_data/major_chords/._1_major_chord.wav
Removed chords_data/major_chords/._3_major_chord.wav
Removed chords_data/minor_chords/._4_minor_chord.wav
Removed chords_data/minor_chords/._2_minor_chord.wav
Removed chords_data/minor_chords/._3_minor_chord.wav
Removed chords_data/minor_chords/._5_minor_chord.wav
Removed chords_data/minor_chords/._1_minor_chord.wav


In [13]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os

# Function to generate spectrograms
def generate_spectrogram(file_path, output_path):
    y, sr = librosa.load(file_path, duration=3)
    S = librosa.feature.melspectrogram(y=y, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)

    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-frequency spectrogram')
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

# Loop through both major and minor chord directories
for chord_type in ['major_chords', 'minor_chords']:
    folder_path = f'chords_data/{chord_type}'
    output_folder = f'chords_data/spectrograms/{chord_type}'
    os.makedirs(output_folder, exist_ok=True)

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder_path, file_name)
            output_path = os.path.join(output_folder, file_name.replace('.wav', '.png'))
            generate_spectrogram(file_path, output_path)


In [14]:
# Check the generated spectrograms
os.listdir('chords_data/spectrograms/major_chords')

['4_major_chord.png',
 '2_major_chord.png',
 '5_major_chord.png',
 '1_major_chord.png',
 '3_major_chord.png']

In [15]:
os.listdir('chords_data/spectrograms/minor_chords')

['5_minor_chord.png',
 '3_minor_chord.png',
 '1_minor_chord.png',
 '4_minor_chord.png',
 '2_minor_chord.png']

In [16]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set up ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator = datagen.flow_from_directory(
    'chords_data/spectrograms',
    target_size=(128, 128),
    batch_size=16,
    class_mode='binary',
    subset='training')

validation_generator = datagen.flow_from_directory(
    'chords_data/spectrograms',
    target_size=(128, 128),
    batch_size=16,
    class_mode='binary',
    subset='validation')


Found 8 images belonging to 2 classes.
Found 2 images belonging to 2 classes.


In [17]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build the model
model = Sequential([
    Input(shape=(128, 128, 3)),  # Add Input layer here
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=10
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.2500 - loss: 0.7621 - val_accuracy: 0.5000 - val_loss: 0.8307
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394ms/step - accuracy: 0.6250 - loss: 0.6294 - val_accuracy: 0.5000 - val_loss: 0.7418
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 406ms/step - accuracy: 0.6250 - loss: 1.2021 - val_accuracy: 0.5000 - val_loss: 1.8806
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 865ms/step - accuracy: 0.5000 - loss: 1.2329 - val_accuracy: 0.5000 - val_loss: 1.3600
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 682ms/step - accuracy: 0.6250 - loss: 1.9684 - val_accuracy: 0.5000 - val_loss: 0.8156
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.6250 - loss: 0.9829 - val_accuracy: 0.5000 - val_loss: 0.7201
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x79bc59ae0970>

* **Simplify the Layers:** Start with a Conv2D layer using 16 filters. This keeps the model light, without overwhelming it with too much complexity right out of the gate.

* **Pooling for Simplicity:** Use MaxPooling2D next to reduce the spatial dimensions. This is like condensing your information, making it easier to process while still keeping the essential details.

* Flatten the Output: **bold text** Flatten the data to transition from the convolutional layers to the fully connected layers. Think of it as taking all those features and laying them out in a straight line, ready for the final processing.

* **Dense Layer for Decision-Making:** Use a Dense layer with 64 neurons to make the key decisions. This is where the model starts to figure out whether it's looking at a major or minor chord.

* **Dropout for Regularization:** Add a Dropout layer at 0.3 to prevent overfitting. It's like introducing a bit of randomness, ensuring the model doesn't get too comfortable with any one pattern.

* **Binary Classification:** Finish with a Dense layer with a sigmoid activation function. This keeps it simple—just a clear yes or no, major or minor.