In [1]:
import librosa
import numpy as np

import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score



## Step 1: Extract Audio Features
### You'll first need to extract relevant features from the audio. One common feature is the Mel-frequency cepstral coefficients (MFCCs).

In [2]:
def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfccs, axis=1)

## Step 2: Prepare Dataset
### You'll need a labeled dataset with examples of bat hits and non-hits.

In [3]:
def prepare_dataset(directory_path):
    X, y = [], []

    for filename in os.listdir(directory_path):
        if filename.endswith(".mp4"):
            audio_path = os.path.join(directory_path, filename)
            
            label = 1 if "hit" in filename else 0  # Assuming filenames contain "hit" for hits
            features = extract_features(audio_path)
            
            X.append(features)
            y.append(label)
            
    return np.array(X), np.array(y)

## Step 3: Train a Classifier
### Here, we'll use a simple logistic regression classifier for demonstration purposes. You might need a more sophisticated model depending on your dataset.

In [4]:
directory_path = "../Media"
X, y = prepare_dataset(directory_path)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

classifier = LogisticRegression()
classifier.fit(X_train, y_train)

# Evaluate the model
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

SystemError: initialization of _internal failed without raising an exception