In [4]:
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Preprocessing

In [6]:
data_path = '/kaggle/input/guitar-chords/Guitar_Only'
chords = ['a', 'am', 'bm', 'c', 'd', 'dm', 'e', 'em', 'f', 'g']

# Function to extract HPCP features
def extract_hpcp(file_path):
    y, sr = librosa.load(file_path)
    hpcp = librosa.feature.chroma_stft(y=y, sr=sr)
                                         
    return np.mean(hpcp, axis = 1)

# Preprocess dataset
data = []
labels = []

for chord in chords:
    chord_path = os.path.join(data_path, chord)
    for file in os.listdir(chord_path):
        if file.endswith('.wav'):
            file_path = os.path.join(chord_path, file)
            y, sr = librosa.load(file_path)
            break_pt = round(len(y) * (np.random.randint(1,4) / 4))
            y1 = librosa.feature.chroma_stft(y=y[:break_pt])
            y2 = librosa.feature.chroma_stft(y=y[break_pt:])            
            data.append(np.mean(y1, axis = 1))
            labels.append(chord)
            data.append(np.mean(y2, axis = 1))
            labels.append(chord)

data = np.array(data)
labels = np.array(labels)

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split data
X_train, X_test, y_train, y_test = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

# Neural Network

In [7]:
!pip install tensorflow

Collecting keras<2.16,>=2.15.0 (from tensorflow)
  Downloading keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 3.4.1
    Uninstalling keras-3.4.1:
      Successfully uninstalled keras-3.4.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.[0m[31m
[0mSuccessfully installed keras-2.15.0


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [17]:
model = Sequential()
model.add(Dense(128, input_shape=(data.shape[1],), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(chords), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=32)

test_loss, test_acc = model.evaluate(X_test, y_test)

Epoch 1/50


ValueError: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(32,), output.shape=(32, 10)

# Model Building

# SVM

In [None]:
import pickle
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Train SVM model
model = SVC(kernel='linear', C=100, probability = True)
model.fit(X_train, y_train)

# Save the trained model
with open('chord_svm_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Classification report
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred)
print("Classification report:")
print(report)

# Grid Search

In [None]:
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [None]:
parameters = {'kernel': ['linear', 'rbf'], 'C':[0.1,1,10,100]}
svc = SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
clf.get_params()

In [None]:
report = classification_report(y_test, y_pred)
print(report)

# Random Forest

In [23]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

rf_classifier = RandomForestClassifier(n_estimators=1000, random_state=2802)
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
report = classification_report(y_test, y_pred)
print("Classification report:")
print(report)

Accuracy: 0.94
Classification report:
              precision    recall  f1-score   support

           0       0.97      0.93      0.95        98
           1       0.92      0.96      0.94        73
           2       1.00      0.97      0.98        88
           3       0.96      0.98      0.97        89
           4       0.84      0.89      0.86        74
           5       0.90      0.85      0.88        82
           6       0.93      0.95      0.94        78
           7       0.92      0.91      0.91        64
           8       0.96      0.94      0.95        81
           9       0.95      0.97      0.96        73

    accuracy                           0.94       800
   macro avg       0.93      0.93      0.93       800
weighted avg       0.94      0.94      0.94       800



# Prediction on Actual Songs

In [None]:
import numpy as np
import librosa
import pickle
import warnings
warnings.filterwarnings('ignore')

def predict_chords(audio_file, model, label_encoder, segment_length=0.5):
    y, sr = librosa.load(audio_file)
    duration = librosa.get_duration(y=y, sr=sr)
    print(duration)
    chords_pred = []
    
    # Convert segment_length to frames
    segment_length_frames = int(segment_length * sr)
    
    # Loop through the audio in segments
    for start in np.arange(0, duration, segment_length):
        end = min(start + segment_length, duration)
        
        # Convert start and end times to frame indices
        start_frame = int(start * sr)
        end_frame = int(end * sr)
        
        segment = y[start_frame:end_frame]

        # Extract features
        chroma = librosa.feature.chroma_stft(y=segment, sr=sr)
        features = np.mean(chroma, axis=1).reshape(1,-1)
        
        # Get prediction probabilities
        probas = model.predict_proba(features)[0]
        
        # Get all chords with probability above the threshold
        best_idx = np.argmax(probas)
        best_chord = label_encoder.inverse_transform([best_idx])[0]
        chords_pred.append(best_chord)
    
    return chords_pred

# Load the model and label encoder
with open('chord_svm_model.pkl', 'rb') as f:
    model = pickle.load(f)

# Predict chords
audio_file = '/kaggle/input/about-a-girl/about a girl.wav'
predicted_chords = predict_chords(audio_file, model, label_encoder, segment_length=0.5)

print(f'Predicted chords: {predicted_chords}')


In [None]:
unique_progression = []
for i in predicted_chords:
    if len(unique_progression) == 0:
        unique_progression.append(i)
    else:
        if unique_progression[-1] == str(i):
            continue
        else:
            unique_progression.append(i)
            
print(unique_progression)

In [None]:
import matplotlib.pyplot as plt
# # Visualization of a single audio file for example
# #example_file = os.path.join(base_dir, '/kaggle/input/guitar-chords-v3/Training', '/kaggle/input/guitar-chords-v3/Training/Bb', 'Bb_AcusticPlug12_4.wav')  # Replace with an actual example file path
# example_file = os.path.join('/kaggle/input/test111', '/kaggle/input/test111/coniferous-forest-142569.wav')  # Replace with an actual example file path

# Load the example audio file
# y, sr = librosa.load(example_file, sr=44100, mono=True)

# Normalize the audio signal
y = y / np.max(np.abs(y))

# Extract features
stft = librosa.stft(y)
stft_db = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)

# Plot waveform
plt.figure(figsize=(20, 10))
plt.subplot(3, 1, 1)
librosa.display.waveshow(y, sr=sr)
plt.title('Waveform')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')

# Plot spectrogram
plt.subplot(3, 1, 2)
librosa.display.specshow(stft_db, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')

# Plot MFCCs
plt.subplot(3, 1, 3)
librosa.display.specshow(mfccs, sr=sr, x_axis='time')
plt.colorbar()
plt.title('MFCCs')

plt.tight_layout()
plt.show()

# Plot Chroma features
plt.figure(figsize=(10, 4))
librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma')
plt.colorbar()
plt.title('Chroma Features')
plt.tight_layout()
plt.show()


# Space removal in betn and at the end

In [None]:
import librosa

In [None]:
y, sr = librosa.load('/kaggle/input/guitar-chords-v3/Test/C/C_AcusticPlug28_2.wav')

In [None]:
librosa.display.waveshow(y, sr=sr)

In [None]:
def space_removal(y):
    y = list(y)
    for i,j in enumerate(y):
        if abs(j) >= 0.2:
            y1 = [k for k in y[:i] if abs(k) >= 0.01] + y[i:]
            break
    for i,j in enumerate(y1[::-1]):
        if abs(j) >= 0.2:
            y2 = [k for k in y1[::-1][:i] if abs(k) >= 0.01] + y1[::-1][i:]
            break
    return np.array(y2[::-1])

In [None]:
y_new = space_removal(y)
librosa.display.waveshow(y_new, sr=sr)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
y, sr = librosa.load('/kaggle/input/about-a-girl/about a girl.wav')

# Normalize the audio signal
y = y / np.max(np.abs(y))

# Extract features
stft = librosa.stft(y)
stft_db = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)

# Plot waveform
plt.figure(figsize=(20, 10))
plt.subplot(3, 1, 1)
librosa.display.waveshow(y, sr=sr)
plt.title('Waveform')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')

# Plot spectrogram
plt.subplot(3, 1, 2)
librosa.display.specshow(stft_db, sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')

plt.tight_layout()
plt.show()

# Plot Chroma features
plt.figure(figsize=(10, 4))
librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma')
plt.colorbar()
plt.title('Chroma Features')
plt.tight_layout()
plt.show()