In [1]:
import librosa
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os

from pydub import AudioSegment

from glob import glob



In [2]:
def convert_mp4_to_wav(mp4_file, wav_file):
    audio = AudioSegment.from_file(mp4_file)
    audio.export(wav_file, format="wav")

In [3]:
def extract_features(audio_file, duration=3, sr=22050):
    y, _ = librosa.load(audio_file, duration=duration, sr=sr)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    features = np.vstack([mfccs, chroma, spectral_contrast])
    return features.T

In [4]:
def train_model(pos_samples, neg_samples):
    X = np.vstack([pos_samples, neg_samples])
    y = np.hstack([np.ones(len(pos_samples)), np.zeros(len(neg_samples))])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train a Random Forest classifier (you may choose a different model)
    classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier.fit(X_train, y_train)
    
    # Evaluate the model
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    
    return classifier

In [5]:
def main():
    # Example paths, replace with your dataset paths
    positive_samples_mp4 = glob('../Media/positive_samples/*.mp4')
    negative_samples_mp4 = glob('../Media/negative_samples/*.mp4')

    # Convert MP4 to WAV and extract features
    positive_samples_wav = []
    for mp4_file in positive_samples_mp4:
        wav_file = os.path.splitext(mp4_file)[0] + ".wav"
        convert_mp4_to_wav(mp4_file, wav_file)
        positive_samples_wav.append(wav_file)

    negative_samples_wav = []
    for mp4_file in negative_samples_mp4:
        wav_file = os.path.splitext(mp4_file)[0] + ".wav"
        convert_mp4_to_wav(mp4_file, wav_file)
        negative_samples_wav.append(wav_file)

    # Extract features from positive and negative samples
    pos_features = np.vstack([extract_features(file) for file in positive_samples_wav])
    neg_features = np.vstack([extract_features(file) for file in negative_samples_wav])

    # Train the model
    model = train_model(pos_features, neg_features)

    # Example usage of the model on a new audio file
    new_audio_file_mp4 = "../Media/videos_to_be_processed/Shot1.mp4"
    new_audio_file_wav = os.path.splitext(new_audio_file_mp4)[0] + ".wav"
    convert_mp4_to_wav(new_audio_file_mp4, new_audio_file_wav)
    new_features = extract_features(new_audio_file_wav)
    prediction = model.predict(new_features.reshape(1, -1))[0]

    if prediction == 1:
        print("Snick detected!")
    else:
        print("No snick detected.")

In [6]:
if __name__ == "__main__":
    main()



FileNotFoundError: [Errno 2] No such file or directory: 'ffprobe'