In [None]:
import os
import numpy as np
import librosa
import soundfile as sf
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)

# === Step 1: Data Collection & Preprocessing ===
# Set Dataset Path
dataset_path = r"C:\Users\DIYA DINEEP\Downloads\music_instr_dataset"
max_files_per_class = 70  # Limit the number of files per instrument

# === Step 2: Feature Extraction ===
# Feature Extraction Function
def extract_audio_features(file_path, target_sr=16000):
    try:
        # Load audio
        signal, sr = sf.read(file_path)

        # Convert stereo to mono
        if len(signal.shape) > 1:
            signal = np.mean(signal, axis=1)

        # Normalize signal
        signal = signal / (np.max(np.abs(signal)) + 1e-8)

        # Resample if needed
        if sr != target_sr:
            signal = librosa.resample(signal, orig_sr=sr, target_sr=target_sr)
            sr = target_sr

        # === Extract Features ===
        # 1. MFCC
        mfcc = np.mean(librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=50), axis=1)

        # 2. Spectrograms
        spec = np.abs(librosa.stft(signal, n_fft=256, hop_length=128, window='hamming'))
        spec_mean = np.mean(spec, axis=1)
        downsampled_spec = [np.mean(spec_mean[i::20]) for i in range(20)]

        # 3. Spectral Centroid
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=signal, sr=sr))

        # 4. Zero-Crossing Rate
        zero_crossing = np.mean(librosa.feature.zero_crossing_rate(signal))

        # 5. Chroma Features
        try:
            chroma = np.mean(librosa.feature.chroma_stft(y=signal, sr=sr), axis=1)
        except:
            chroma = np.zeros(12)

        # Combine all features
        features = np.concatenate([mfcc, downsampled_spec, [spectral_centroid], [zero_crossing], chroma])
        return features

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Load Data
features_list = []
labels_list = []

for instrument_name in os.listdir(dataset_path):
    instrument_folder = os.path.join(dataset_path, instrument_name)

    if os.path.isdir(instrument_folder):
        audio_files = [f for f in os.listdir(instrument_folder) if f.endswith('.wav')]

        for audio_file in audio_files[:max_files_per_class]:
            file_path = os.path.join(instrument_folder, audio_file)
            features = extract_audio_features(file_path)

            if features is not None:
                features_list.append(features)
                labels_list.append(instrument_name)

# Convert to numpy arrays
X = np.array(features_list)
y = np.array(labels_list)

# === Step 3: Data Preparation (Train-Test Split) ===
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=1
)

# === Step 4: Train the SVM Model ===
model = SVC(kernel='rbf', C=1, gamma='scale')
model.fit(X_train, y_train)

# === Step 5: Prediction & Evaluation ===
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy * 100:.2f}%\n")

precision = precision_score(y_test, y_pred, average='macro') * 100
recall = recall_score(y_test, y_pred, average='macro') * 100
f1 = f1_score(y_test, y_pred, average='macro') * 100

print(f"Overall Precision: {precision:.2f}%")
print(f"Overall Recall:    {recall:.2f}%")
print(f"Overall F1 Score:  {f1:.2f}%\n")

# Classification Report
report_dict = classification_report(y_test, y_pred, output_dict=True)
print("Classification Report (in %):")
print("{:<20s} {:>10s} {:>10s} {:>10s} {:>10s}".format("Label", "Precision", "Recall", "F1-Score", "Support"))
print("-" * 60)

for label, metrics in report_dict.items():
    if label not in ["accuracy", "macro avg", "weighted avg"]:
        precision = metrics['precision'] * 100
        recall = metrics['recall'] * 100
        f1 = metrics['f1-score'] * 100
        support = int(metrics['support'])
        print("{:<20s} {:10.2f} {:10.2f} {:10.2f} {:10d}".format(label, precision, recall, f1, support))

# Macro and weighted averages
for avg in ["macro avg", "weighted avg"]:
    precision = report_dict[avg]['precision'] * 100
    recall = report_dict[avg]['recall'] * 100
    f1 = report_dict[avg]['f1-score'] * 100
    support = int(report_dict[avg]['support'])
    print("{:<20s} {:10.2f} {:10.2f} {:10.2f} {:10d}".format(avg, precision, recall, f1, support))

# Confusion Matrix Plot
cm = confusion_matrix(y_test, y_pred, labels=np.unique(y))
labels = np.unique(y)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=labels, yticklabels=labels,
            cbar_kws={'label': 'Number of Predictions'}, linewidths=0.25, linecolor='black')

plt.title("Confusion Matrix - Instrument Recognition", fontsize=14, weight='bold', pad=20)
plt.xlabel("Predicted Label", fontsize=14)
plt.ylabel("True Label", fontsize=14)
plt.xticks(rotation=45, ha='right', fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout()
plt.show()

# === Step 6: Plot Accuracy vs C Value ===
C_values = np.logspace(-2, 2, 10)  # C from 0.01 to 100
train_accuracies = []
test_accuracies = []

for C in C_values:
    model = SVC(kernel='rbf', C=C, gamma='scale')
    model.fit(X_train, y_train)

    train_acc = model.score(X_train, y_train)
    test_acc = model.score(X_test, y_test)

    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)

plt.figure(figsize=(6,6))
plt.plot(C_values, train_accuracies, label='Train Accuracy', marker='o')
plt.plot(C_values, test_accuracies, label='Test Accuracy', marker='s')
plt.xscale('log')
plt.xlabel('C Value (log scale)', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.title('Accuracy vs C Value (SVM)', fontsize=16, weight='bold')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()




  return pitch_tuning(



Accuracy: 95.11%

Overall Precision: 95.37%
Overall Recall:    95.11%
Overall F1 Score:  95.13%

Classification Report (in %):
Label                 Precision     Recall   F1-Score    Support
------------------------------------------------------------
banjo                     84.62      78.57      81.48         14
bass                     100.00      92.86      96.30         14
bassoon                  100.00     100.00     100.00         14
cello                    100.00      85.71      92.31         14
clarinet                  93.33     100.00      96.55         14
contrabassoon            100.00      92.86      96.30         14
cor anglaies             100.00     100.00     100.00         14
double bass              100.00      92.86      96.30         14
flute                     93.33     100.00      96.55         14
french horn              100.00     100.00     100.00         14
guitar                    75.00      85.71      80.00         14
mandoline                 85.71