In [1]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# Feature extraction
def extract_features_from_file(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)  # Mean across time axis

# Load data from folder
def load_data_from_folders(base_folder, labels):
    data = []
    target = []
    for label in labels:
        folder = os.path.join(base_folder, label)
        for filename in os.listdir(folder):
            if filename.endswith(".wav"):
                file_path = os.path.join(folder, filename)
                features = extract_features_from_file(file_path)
                data.append(features)
                target.append(label)
    return np.array(data), np.array(target)

# Set paths and labels
base_folder = '/Users/suarezcruzrosy/Downloads/animals'
labels = ['dog', 'cat', 'bird']

# Load data
X, y = load_data_from_folders(base_folder, labels)

# Encode labels to integers
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# KNN with GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 10],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best Parameters
print("Best Parameters:", grid_search.best_params_)

# Model Evaluation
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")


Best Parameters: {'metric': 'euclidean', 'n_neighbors': 10, 'weights': 'distance'}
Test Accuracy: 0.7622950819672131


In [1]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder

# Feature extraction
def extract_features_from_file(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)  # Mean across time axis

# Load data from folder
def load_data_from_folders(base_folder, labels):
    data = []
    target = []
    for label in labels:
        folder = os.path.join(base_folder, label)
        for filename in os.listdir(folder):
            if filename.endswith(".wav"):
                file_path = os.path.join(folder, filename)
                features = extract_features_from_file(file_path)
                data.append(features)
                target.append(label)
    return np.array(data), np.array(target)

# Set paths and labels
base_folder = '/Users/suarezcruzrosy/Downloads/animals'
labels = ['dog', 'cat', 'bird']

# Load data
X, y = load_data_from_folders(base_folder, labels)

# Encode labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Logistic Regression with GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],  # Regularization strength
    'solver': ['lbfgs', 'liblinear'],  # Solvers for optimization
    'max_iter': [100, 200, 300]  # Maximum iterations for convergence
}
grid_search = GridSearchCV(LogisticRegression(multi_class='ovr'), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best Parameters
print("Best Parameters:", grid_search.best_params_)

# Model Evaluation
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")




Best Parameters: {'C': 10, 'max_iter': 100, 'solver': 'lbfgs'}
Test Accuracy: 0.8114754098360656




In [4]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Feature extraction
def extract_features_from_file(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)  # Mean across time axis

# Load data from folder
def load_data_from_folders(base_folder, labels):
    data = []
    target = []
    for label in labels:
        folder = os.path.join(base_folder, label)
        for filename in os.listdir(folder):
            if filename.endswith(".wav"):
                file_path = os.path.join(folder, filename)
                features = extract_features_from_file(file_path)
                data.append(features)
                target.append(label)
    return np.array(data), np.array(target)

# Set paths and labels
base_folder = '/Users/suarezcruzrosy/Downloads/animals'
labels = ['dog', 'cat', 'bird']

# Load data
X, y = load_data_from_folders(base_folder, labels)

# Encode labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Random Forest with GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],  # Number of trees
    'max_depth': [None, 10, 20],  # Maximum depth of trees
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],  # Minimum samples required to be at a leaf node
    'bootstrap': [True, False]  # Whether bootstrap samples are used when building trees
}

# Initialize Random Forest classifier
rf_clf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(rf_clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best Parameters
print("Best Parameters:", grid_search.best_params_)

# Model Evaluation
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")


Best Parameters: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 200}
Test Accuracy: 0.8360655737704918


In [5]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

# Feature extraction
def extract_features_from_file(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)  # Mean across time axis

# Load data from folder
def load_data_from_folders(base_folder, labels):
    data = []
    target = []
    for label in labels:
        folder = os.path.join(base_folder, label)
        for filename in os.listdir(folder):
            if filename.endswith(".wav"):
                file_path = os.path.join(folder, filename)
                features = extract_features_from_file(file_path)
                data.append(features)
                target.append(label)
    return np.array(data), np.array(target)

# Set paths and labels
base_folder = '/Users/suarezcruzrosy/Downloads/animals'
labels = ['dog', 'cat', 'bird']

# Load data
X, y = load_data_from_folders(base_folder, labels)

# Encode labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# XGBoost with GridSearchCV
param_grid = {
    'max_depth': [3, 6, 10],  # Maximum depth of trees
    'learning_rate': [0.01, 0.1, 0.3],  # Learning rate
    'n_estimators': [100, 200, 300],  # Number of trees
    'subsample': [0.8, 1.0],  # Fraction of samples to use for each tree
    'colsample_bytree': [0.8, 1.0]  # Fraction of features to use for each tree
}

# Initialize XGBoost classifier without 'use_label_encoder'
xgb_clf = xgb.XGBClassifier(eval_metric='mlogloss')

grid_search = GridSearchCV(xgb_clf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best Parameters
print("Best Parameters:", grid_search.best_params_)

# Model Evaluation
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")


Best Parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 100, 'subsample': 0.8}
Test Accuracy: 0.8278688524590164
