<a href="https://colab.research.google.com/github/aditimirji/AIML-Project-Series/blob/main/ML_Orange.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import os
import zipfile
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Step 1: Extract ZIP File
def extract_zip(zip_file_path, extract_folder):
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)

# Step 2: Load Audio Files and Assign Labels (including subdirectories)
def load_audio_files(folder):
    labels = []
    features = []
    print(f"Searching for audio files in: {folder}")

    # Walk through all subdirectories to find audio files
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith('.wav'):
                # Determine the label based on the subdirectory name
                if 'dog' in root.lower():
                    label = 'dog'
                elif 'cat' in root.lower():
                    label = 'cat'
                elif 'bird' in root.lower():
                    label = 'bird'
                else:
                    continue  # Skip files that don't match the expected labels
                labels.append(label)

                # Load audio file and extract features using librosa
                file_path = os.path.join(root, file)
                y, sr = librosa.load(file_path, sr=None)

                # Extract MFCC features
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                mfcc_mean = np.mean(mfcc.T, axis=0)  # Average the MFCCs across time
                features.append(mfcc_mean)

    return np.array(features), np.array(labels)

# Step 3: Preprocess Data (Encode labels)
def preprocess_labels(labels):
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(labels)  # Convert to numeric labels
    return encoded_labels, label_encoder

# Step 4: Train Classifier
def train_classifier(X_train, y_train):
    classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier.fit(X_train, y_train)
    return classifier

# Step 5: Evaluate Model
def evaluate_model(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Main Program
if __name__ == "__main__":
    # Define paths
    zip_file_path = 'archive(1).zip'  # Replace with your zip file path
    extract_folder = 'extracted_audio'  # Folder to extract the files to
    animal_folder = os.path.join(extract_folder, 'Animals')  # Corrected folder name

    # Step 1: Extract the zip file
    extract_zip(zip_file_path, extract_folder)

    # Check contents of the extracted folder
    if os.path.exists(animal_folder):
        print(f"Subfolders in {animal_folder}: {os.listdir(animal_folder)}")
    else:
        print(f"Folder {animal_folder} does not exist!")

    # Step 2: Load audio files and extract features from 'Animals' subfolder
    X, y = load_audio_files(animal_folder)
    print(f"Loaded {len(X)} samples.")

    if len(X) == 0:
        print("No audio files found. Please check the folder and file paths.")
    else:
        # Step 3: Preprocess labels
        y, label_encoder = preprocess_labels(y)

        # Step 4: Split the data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Step 5: Train the classifier
        classifier = train_classifier(X_train, y_train)

        # Step 6: Evaluate the model
        accuracy = evaluate_model(classifier, X_test, y_test)

        # Print results
        print(f"Training Accuracy: {accuracy * 100:.2f}%")





Subfolders in extracted_audio/Animals: ['bird', 'cat', 'dog']
Searching for audio files in: extracted_audio/Animals
Loaded 610 samples.
Training Accuracy: 81.15%
