In [None]:
# Import necessary libraries
import librosa
import numpy as np

In [None]:
# Load the audio file using Librosa
file_path = "/content/blues.00000.wav"
audio, sr = librosa.load(file_path)

 Chroma Feature :capture harmonic and melodic characteristics of music, while being robust to changes in timbre and instrumentation.

In [None]:
# Extract Chroma Short-Time Fourier Transform (STFT) features
chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sr)
chroma_stft

array([[0.33112544, 0.23492372, 0.8714165 , ..., 0.15691066, 0.87786204,
        1.        ],
       [0.39392966, 0.29400367, 0.6882651 , ..., 0.13222207, 0.68507576,
        0.7281689 ],
       [0.42932406, 0.41955495, 0.51602733, ..., 0.25044858, 0.56496435,
        0.40339905],
       ...,
       [1.        , 1.        , 1.        , ..., 0.2619948 , 0.95514834,
        0.86573505],
       [0.42033246, 0.36797962, 0.6217583 , ..., 1.        , 1.        ,
        0.9858056 ],
       [0.30348283, 0.19700052, 0.64925486, ..., 0.6287608 , 0.71722466,
        0.81626564]], dtype=float32)

The spectral centroid represents the center of mass of the spectrum and indicates where the "center of gravity" of the signal's frequency distribution is located.

In [None]:
# Extract Spectral Centroid feature
spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)

 Spectral bandwidth describes the width of the spectral band and provides information about the spread of frequencies in the signal.

In [None]:
# Extract Spectral Bandwidth feature
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr)

pectral rolloff represents the frequency below which a certain percentage of the total spectral energy is contained.

In [None]:
# Extract Spectral Rolloff feature
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)

MFCCs are coefficients representing the short-term power spectrum of a sound, modeled after the human auditory system's response to different frequencies.

In [None]:
# Extract Mel-frequency cepstral coefficients (MFCCs) features
mel_freq = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)


In [None]:
# Combine all extracted features into one array (stack them vertically)
features = np.vstack([ spectral_centroid, spectral_bandwidth, spectral_rolloff, mel_freq])


In [None]:
# Calculate the mean of each feature over time
mean_features = np.mean(features, axis=1)

In [None]:
# Print the extracted features
print("Spectral Centroid:", spectral_centroid.shape)
print("Spectral Bandwidth:", spectral_bandwidth)
print("Spectral Rolloff:", spectral_rolloff)
print("MFCCs:", mel_freq)
print("Mean Features:", mean_features)

Spectral Centroid: (1, 1293)
Spectral Bandwidth: (1, 1293)
Spectral Rolloff: (1, 1293)
MFCCs: (13, 1293)
Mean Features: (16,)


In [None]:
import librosa
import numpy as np
import pandas as pd

# Function to extract features from an audio file
def extract_features(file_path):
    # Load the audio file using Librosa
    audio, sr = librosa.load(file_path)

    # Extract Chroma Short-Time Fourier Transform (STFT) features
    chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sr)

    # Extract Root Mean Square (RMS) features
    rms = librosa.feature.rms(y=audio)

    # Extract Spectral Centroid feature
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)

    # Extract Spectral Bandwidth feature
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr)

    # Extract Spectral Rolloff feature
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)

    # Extract Zero-Crossing Rate feature
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=audio)

    # Extract Harmonic and Perceptrual features
    harmony, perceptr = librosa.effects.hpss(audio)

    # Extract Tempo
    tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)

    # Extract Mel-frequency cepstral coefficients (MFCCs) features
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)

    # Calculate the mean and variance of each feature over time
    mean_features = [np.mean(feature) for feature in [chroma_stft, rms, spectral_centroid, spectral_bandwidth, spectral_rolloff, zero_crossing_rate, harmony, perceptr, mfccs]]
    var_features = [np.var(feature) for feature in [chroma_stft, rms, spectral_centroid, spectral_bandwidth, spectral_rolloff, zero_crossing_rate, harmony, perceptr, mfccs]]

    # Return the extracted features
    return mean_features + var_features + [tempo]

# Example usage:
file_path = "/content/blues.00000.wav"
features = extract_features(file_path)

# Print the extracted features
print("Extracted Features:", features)


Extracted Features: [0.3501285, 0.13018432, 1784.1226412753101, 2002.4124072733878, 3805.7230301080335, 0.08304482066898686, -4.906634e-05, -1.0618483e-05, 1.9366392, 0.08877166, 0.00282838, 129745.48441929805, 85834.41040579003, 901252.916706492, 0.0007669456545940504, 0.008172463, 0.0057044374, 1767.5433, 123.046875]


In [None]:
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

# Function to extract features from an audio file
def extract_features(file_path):
    # For demonstration, let's assume features are already extracted
    # with the same dimensions as those used during training
    # Replace this with your actual feature extraction method
    num_features = 58  # Assuming the same number of features used during training
    features = np.random.rand(1, num_features)  # Random features for demonstration
    return features


# Load the dataset from the CSV file
# Replace 'NewDatasetTrain.csv' with the filename of your dataset
try:
    data = pd.read_csv('/content/features_30_sec.csv')
except FileNotFoundError:
    print("Dataset file not found.")
    exit()

data['label'] = data['label'].str.strip().str.replace('\n', '')

# Extract features and labels
X = data.drop(columns=['label', 'filename'])  # Exclude the label and filename columns
y = data['label']

# Convert labels to categorical
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a pipeline for preprocessing and modeling
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Scale features
    ('clf', RandomForestClassifier(random_state=42))  # Random Forest Classifier
])

# Train the model
pipeline.fit(X_train, y_train)

# Evaluate the model on the test set
predictions = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Test accuracy: {accuracy * 100:.2f}%")

# Save the trained model and preprocessing steps
joblib.dump(pipeline, 'trained_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

# Function to predict the genre of an audio file
def predict_genre(file_path, model, label_encoder):
    # Extract features from the audio file
    features = extract_features(file_path)

    # Predict the genre label
    predicted_label = model.predict(features)

    # Decode the predicted label using label encoder
    predicted_genre = label_encoder.inverse_transform(predicted_label)

    return predicted_genre[0]

# Example usage:
file_path = "/content/classical.00000.wav"  # Replace with the path to your audio file
model = joblib.load('trained_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')
predicted_genre = predict_genre(file_path, model, label_encoder)
print("Predicted Genre:", predicted_genre)


Test accuracy: 75.50%
Predicted Genre: classical


