In [1]:
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras import layers, models
import os

In [2]:
def parse_note_from_filename(filename):
    parts = filename.split('_')
    
    if len(parts) > 0:
        return parts[0]
    else:
        return 'unknown'

In [3]:
folder_path = 'piano_triads'

# Set a fixed length for all audio samples (you can adjust this as needed)
fixed_length = 44100  # for example, 1 second at 44.1 kHz

audio_samples = []
musical_notes = []

for filename in os.listdir(folder_path):
    if filename.endswith('.wav'):
        file_path = os.path.join(folder_path, filename)

        # loading audio samples
        audio, sr = librosa.load(file_path, sr=None)
        
        # Ensure that all audio samples have the same length
        if len(audio) < fixed_length:
            audio = np.pad(audio, (0, fixed_length - len(audio)))
        elif len(audio) > fixed_length:
            audio = audio[:fixed_length]
        
        note = parse_note_from_filename(filename)
        
        audio_samples.append(audio)
        musical_notes.append(note)

X = np.array(audio_samples)
y = np.array(musical_notes)

In [4]:
def extract_features(audio_samples, sample_rate=44100):
    features = []
    for audio in audio_samples:
        # Applying FFT
        fft_result = np.fft.fft(audio)
        magnitudes = np.abs(fft_result)
        
        features.append(magnitudes)
    return features

In [5]:
# features = extract_features(audio_samples)

# mean_magnitudes = [np.mean(feature) for feature in features]

# import matplotlib.pyplot as plt
# plt.figure(figsize=(10, 6))
# plt.plot(mean_magnitudes)
# plt.xlabel('Sample Index')
# plt.ylabel('Mean Magnitude')
# plt.title('Mean Magnitude of FFT Coefficients for Audio Samples')
# plt.grid()
# plt.show()


In [6]:
# import matplotlib.pyplot as plt

# num_samples_to_visualize = 50

# for i in range(num_samples_to_visualize):
#     plt.figure(figsize=(7, 4))
#     plt.specgram(X[i], Fs=sr, cmap='inferno')

#     filename = os.listdir(folder_path)[i]
    
#     plt.title(f'Spectrogram of {filename}')
#     plt.xlabel('Time (s)')
#     plt.ylabel('Frequency (Hz)')
#     plt.colorbar(format='%+2.0f dB')
#     plt.tight_layout()
#     plt.show()


In [7]:
# #amplitude vs frequency

# import matplotlib.pyplot as plt
# import numpy as np

# num_samples_to_visualize = 50

# for i in range(num_samples_to_visualize):
#     plt.figure(figsize=(7, 4))
    
#     fft_result = np.fft.fft(X[i])
    
#     frequencies = np.fft.fftfreq(len(fft_result), 1/sr)
#     amplitudes = np.abs(fft_result)

#     plt.plot(frequencies[:len(frequencies)//2], amplitudes[:len(amplitudes)//2])  
#     filename = os.listdir(folder_path)[i]
    
#     plt.title(f'Amplitude vs. Frequency of {filename}')
#     plt.xlabel('Frequency (Hz)')
#     plt.ylabel('Amplitude')
#     plt.grid()
#     plt.tight_layout()
#     plt.show()


In [8]:
# # waveform plot
# import librosa.display

# num_samples_to_visualize = 40

# for i in range(num_samples_to_visualize):
#     plt.figure(figsize=(7, 4))
#     plt.plot(np.arange(len(X[i])) / sr, X[i])
#     filename = os.listdir(folder_path)[i]

#     plt.title(f'Waveform of Audio Sample {filename}')
#     plt.xlabel('Time (s)')
#     plt.ylabel('Amplitude')
#     plt.tight_layout()
#     plt.show()

In [9]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder


In [10]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Encode the labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


In [11]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define musical note names
NOTE_NAMES = ["C", "Cs", "D", "Eb", "E", "F", "Fs", "G", "Gs", "A", "Bb", "B"]

from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping

# Define a learning rate schedule function
def lr_schedule(epoch):
    initial_learning_rate = 0.001  # Set your initial learning rate here
    decay = 0.95  # Set the decay rate
    if epoch > 5:  # Adjust this condition based on when you want to start reducing the learning rate
        return initial_learning_rate * (decay ** (epoch - 5))
    else:
        return initial_learning_rate

# Create a learning rate scheduler callback
lr_scheduler = LearningRateScheduler(lr_schedule)

# Create a neural network model
# Define the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(len(np.unique(y_train_encoded)), activation='softmax')  # Output layer with softmax for classification
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [12]:
# Train your model using X_train_augmented
# early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train_encoded, epochs=50, batch_size=32,
                    validation_data=(X_test, y_test_encoded),
                    callbacks=[lr_scheduler])# Train your model using X_train_augmented

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [13]:
# Evaluate the model using encoded labels
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Test Loss: 5.7840, Test Accuracy: 0.3333


In [14]:
# Make predictions
predictions = model.predict(X_test)



In [15]:
# !pip install matplotlib scipy plotly


In [16]:
import numpy as np
import matplotlib.pyplot as plt

def plot_fft(p, xf, fs, notes):
    plt.figure(figsize=(12, 6))
    plt.title("Frequency Spectrum")
    plt.xlabel("Frequency (Hz)")
    plt.ylabel("Magnitude")
    plt.plot(xf, p)

    for note in notes:
        plt.annotate(note[1], (note[0] + 10, note[2]), fontsize=12, ha='center', va='bottom')

    plt.grid()
    plt.show()

def extract_sample(audio, frame_number, frame_offset):
    end = frame_number * frame_offset
    begin = int(end - FFT_WINDOW_SIZE)

    if end == 0:
        return np.zeros((np.abs(begin)), dtype=float)
    elif begin < 0:
        return np.concatenate([np.zeros((np.abs(begin)), dtype=float), audio[0:end]])
    else:
        return audio[begin:end]

def find_top_notes(fft, num, xf):
    if np.max(fft.real) < 0.001:
        return []

    lst = [x for x in enumerate(fft.real)]
    lst = sorted(lst, key=lambda x: x[1], reverse=True)

    idx = 0
    found = []
    found_note = set()
    while (idx < len(lst)) and (len(found) < num):
        f = xf[lst[idx][0]]
        y = lst[idx][1]
        n = freq_to_number(f)
        n0 = int(round(n))
        name = note_name(n0)

        if name not in found_note:
            found_note.add(name)
            s = [f, note_name(n0), y]
            found.append(s)
        idx += 1

    return found

def freq_to_number(f): return 69 + 12 * np.log2(f / 440.0)
def note_name(n): return NOTE_NAMES[n % 12]


In [17]:
# import numpy as np
# import matplotlib.pyplot as plt

# def plot_fft(magnitudes, sample_rate, notes, xlim=None):
#     fft_values = np.fft.fftfreq(len(magnitudes), 1.0 / sample_rate)
# #
#     plt.figure(figsize=(12, 6))
#     plt.title("Frequency Spectrum")
#     plt.xlabel("Frequency (Hz)")
#     plt.ylabel("Magnitude")
#     plt.plot(fft_values, magnitudes)
    
#     if xlim:
#         plt.xlim(xlim)
    
#     for note in notes:
#         plt.annotate(note[1], (note[0] + 10, note[2]), fontsize=12, ha='center', va='bottom')
    
#     plt.grid()
#     plt.show()

# # Example: Replace this with your actual audio_samples and sample_rate
# # Generate random audio data for demonstration purposes
# sample_rate = 44100
# duration = 5  # 5 seconds of audio
# num_samples = int(sample_rate * duration)
# audio_samples = np.random.randn(num_samples)

# # Extract features and notes (replace with your actual audio and notes)
# features = extract_features([audio_samples])
# num_notes = 5  # Adjust the number of top notes you want to display

# for feature in features:
#     notes = find_top_notes(feature, num_notes, fft_values)  # Replace with your notes
#     plot_fft(feature, sample_rate, notes, xlim=(0, 1000))  # Adjust frequency range if needed


In [18]:
model.save("trial1")

INFO:tensorflow:Assets written to: trial1\assets


INFO:tensorflow:Assets written to: trial1\assets


In [20]:
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import LabelEncoder

# Define musical note names
NOTE_NAMES = ["C", "Cs", "D", "Eb", "E", "F", "Fs", "G", "Gs", "A", "Bb", "B"]

# Load your trained model
model = tf.keras.models.load_model('trial1')  # Replace 'your_model_path' with the actual path to your trained model

def extract_features(audio):
    # Applying FFT
    fft_result = np.fft.fft(audio)
    magnitudes = np.abs(fft_result)
    return magnitudes

def predict_note(audio_file_path, model):
    # Load and process the audio file
    audio, _ = librosa.load(audio_file_path, sr=None)
    features = extract_features(audio)
    
    # Make sure the features have the same shape as the model's input shape
    if len(features) < X_train.shape[1]:
        features = np.pad(features, (0, X_train.shape[1] - len(features)))
    elif len(features) > X_train.shape[1]:
        features = features[:X_train.shape[1]]
    
    features = np.array([features])  # Reshape for model input
    
    # Make a prediction using the trained model
    prediction = model.predict(features)
    
    # Convert prediction to musical note
    predicted_note_index = np.argmax(prediction)
    predicted_note = NOTE_NAMES[predicted_note_index]
    
    return predicted_note

# Example usage:
audio_file_path = 'piano_triads/C_maj_4_0.wav'  # Replace 'your_audio_file_path.wav' with the path to the user's input audio file
predicted_note = predict_note(audio_file_path, model)
print(f"Predicted Note: {predicted_note}")


Predicted Note: Fs


##### 