# Processing User Sample Input

In [13]:
import numpy as np
import librosa
import soundfile as sf
import pandas as pd
from pathlib import Path
import os
from scipy.signal import butter, filtfilt
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')

In [11]:
def preprocess_audio(y, sr):
    """
    Clean up the audio recording with minimal interference to the core sound.
    
    Args:
        y: Audio signal
        sr: Sample rate
    Returns:
        Cleaned audio signal
    """
    # 1. High-pass filter to remove low frequency rumble/boom
    nyq = sr / 2
    cutoff = 60  # Hz - remove frequencies below 60Hz
    order = 4
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    y_filtered = filtfilt(b, a, y)
    
    # 2. Normalize audio
    y_normalized = librosa.util.normalize(y_filtered)
    
    # 3. Remove silence and very quiet parts
    y_cleaned = librosa.effects.trim(
        y_normalized,
        top_db=30,  # adjust this value based on your needs
        frame_length=2048,
        hop_length=512
    )[0]
    
    # 4. Subtle noise reduction using spectral gating
    S = librosa.stft(y_cleaned)
    mag = np.abs(S)
    phase = np.angle(S)
    
    # Estimate noise floor
    noise_floor = np.mean(np.min(mag, axis=1))
    threshold = noise_floor * 2  # Adjust multiplier based on needs
    
    # Apply soft gate
    mag_gated = np.maximum(mag - threshold, 0)
    y_denoised = librosa.istft(mag_gated * np.exp(1j * phase))
    
    # 5. Final normalization
    y_final = librosa.util.normalize(y_denoised)
    
    return y_final

def segment_user_input(input_file, segments_output_dir, csv_output_dir, drum_type, segment_duration=0.5):
    """
    Segment a user's drum recording based on onset detection.
    
    Args:
        input_file: Path to the input wav file (snare.wav or kick.wav)
        segments_output_dir: Directory to save the segmented audio files
        csv_output_dir: Directory to save the segment info CSV
        drum_type: Type of drum ('sd' for snare, 'kd' for kick)
        segment_duration: Duration of each segment in seconds
    """
    # Create output directories
    segments_dir = Path(segments_output_dir) / f"{drum_type}_segments"
    segments_dir.mkdir(parents=True, exist_ok=True)
    
    csv_dir = Path(csv_output_dir)
    csv_dir.mkdir(parents=True, exist_ok=True)
    
    # Load and preprocess audio file
    y, sr = librosa.load(input_file, sr=None)  # Keep original sample rate
    
    # Clean up the audio
    print("Cleaning up audio...")
    y_cleaned = preprocess_audio(y, sr)
    
    # Save preprocessed full audio for reference
    preprocessed_path = Path(input_file).parent / f"{Path(input_file).stem}_cleaned.wav"
    sf.write(preprocessed_path, y_cleaned, sr)
    print(f"Saved cleaned audio to: {preprocessed_path}")
    
    # Detect onsets on cleaned audio
    onset_frames = librosa.onset.onset_detect(
        y=y_cleaned, 
        sr=sr,
        units='frames',
        hop_length=512,
        backtrack=True,
        pre_max=20,
        post_max=20,
        pre_avg=100,
        post_avg=100,
        delta=0.2,
        wait=30
    )
    
    # Convert frames to time
    onset_times = librosa.frames_to_time(onset_frames, sr=sr)
    
    segment_info = []
    
    # Process each onset
    for i, onset_time in enumerate(onset_times):
        start_sample = int(onset_time * sr)
        end_sample = start_sample + int(segment_duration * sr)
        
        if start_sample < 0:
            start_sample = 0
        if end_sample > len(y_cleaned):
            end_sample = len(y_cleaned)
            
        if end_sample > start_sample:
            segment = y_cleaned[start_sample:end_sample]
            
            if len(segment) < int(segment_duration * sr):
                segment = np.pad(
                    segment,
                    (0, int(segment_duration * sr) - len(segment)),
                    mode='constant'
                )
            
            segment_filename = f"user_{drum_type}_{i:04d}.wav"
            segment_path = segments_dir / segment_filename
            
            sf.write(str(segment_path), segment, sr)
            
            segment_info.append({
                'segment_path': str(segment_path),
                'instrument_label': drum_type,
                'onset_time': onset_time,
                'original_wav': str(input_file)
            })
    
    segment_df = pd.DataFrame(segment_info)
    csv_path = csv_dir / f"{drum_type}_segment_info.csv"
    segment_df.to_csv(csv_path, index=False)
    
    print(f"\nSegmentation Summary for {drum_type}:")
    print(f"Total segments extracted: {len(segment_df)}")
    print(f"Segments saved in: {segments_dir}")
    print(f"CSV saved as: {csv_path}")
    
    return segment_df

In [2]:
snare_df = segment_user_input(
    input_file="../audio/sample1/vox/snare.wav",
    segments_output_dir="../audio/sample1/segments",
    csv_output_dir="../csv_info",
    drum_type="sd"
)

Cleaning up audio...
Saved cleaned audio to: ../audio/sample1/vox/snare_cleaned.wav

Segmentation Summary for sd:
Total segments extracted: 8
Segments saved in: ../audio/sample1/segments/sd_segments
CSV saved as: ../csv_info/sd_segment_info.csv


In [3]:
kick_df = segment_user_input(
    input_file="../audio/sample1/vox/kick.wav",
    segments_output_dir="../audio/sample1/segments",
    csv_output_dir="../csv_info",
    drum_type="kd"
)

Cleaning up audio...
Saved cleaned audio to: ../audio/sample1/vox/kick_cleaned.wav

Segmentation Summary for kd:
Total segments extracted: 7
Segments saved in: ../audio/sample1/segments/kd_segments
CSV saved as: ../csv_info/kd_segment_info.csv


In [12]:
def extract_features_from_user_input(segment_info_path, segments_dir, n_mfcc=14, features_output_dir=None, labels_output_dir=None):
    """
    Extract the same features as the original pipeline: MFCCs, deltas, and envelope descriptors.
    Then select the most important features based on the original analysis.
    
    Args:
        segment_info_path: Path to the CSV containing segment information
        segments_dir: Directory containing the audio segments
        n_mfcc: Number of MFCC coefficients (default 14 as in original)
        features_output_dir: Where to save the features
        labels_output_dir: Where to save the labels
    """
    # Load segment info
    metadata = pd.read_csv(segment_info_path)
    
    # Initialize arrays to store features and labels
    features = []
    labels = []
    
    print(f"Extracting expanded feature set from {segments_dir}...")
    for idx, row in tqdm(metadata.iterrows(), total=len(metadata)):
        try:
            # Load audio segment
            y, sr = librosa.load(row['segment_path'])
            
            # Skip if empty
            if len(y) == 0:
                print(f"Skipping empty audio file: {row['segment_path']}")
                continue
                
            # 1. Extract MFCCs and their statistics
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            mfcc_mean = np.mean(mfcc, axis=1)
            
            # 2. Compute MFCC deltas (first derivatives)
            mfcc_delta = librosa.feature.delta(mfcc)
            mfcc_delta_mean = np.mean(mfcc_delta, axis=1)
            
            # 3. Extract envelope-based descriptors
            envelope = np.abs(y)
            
            if len(envelope) == 0:
                print(f"Skipping file with empty envelope: {row['segment_path']}")
                continue
            
            # Find the maximum amplitude and its position
            max_amp_pos = np.argmax(envelope)
            
            # 3.1 Maximum derivative before the maximum amplitude
            pre_max_deriv = 0
            if max_amp_pos > 0:
                pre_envelope = envelope[:max_amp_pos]
                if len(pre_envelope) > 1:
                    pre_max_deriv = np.max(np.diff(pre_envelope))
            
            # 3.2 Derivative after the maximum amplitude
            post_max_deriv = 0
            if max_amp_pos < len(envelope)-1:
                post_envelope = envelope[max_amp_pos:]
                if len(post_envelope) > 1:
                    post_max_deriv = np.min(np.diff(post_envelope))
            
            # 3.3 Temporal centroid
            times = np.arange(len(y))
            env_sum = np.sum(envelope)
            if env_sum > 0:
                temporal_centroid = np.sum(times * envelope) / env_sum
                temporal_centroid_ratio = temporal_centroid / len(y)
            else:
                temporal_centroid_ratio = 0.5
            
            # 3.4 Flatness coefficient
            with np.errstate(divide='ignore', invalid='ignore'):
                flatness = librosa.feature.spectral_flatness(y=y)[0].mean()
                flatness = 0.0 if np.isnan(flatness) else flatness
            
            # Combine all features in the same order as original pipeline
            feature_vector = np.concatenate([
                mfcc_mean,                # 14 features
                mfcc_delta_mean,          # 14 features
                [pre_max_deriv,           # 1 feature
                 post_max_deriv,          # 1 feature
                 flatness,                # 1 feature
                 temporal_centroid_ratio]  # 1 feature
            ])
            
            features.append(feature_vector)
            labels.append(row['instrument_label'])
            
        except Exception as e:
            print(f"Error processing {row['segment_path']}: {str(e)}")
            continue
    
    # Convert to numpy arrays
    X = np.array(features)
    y = np.array(labels)
    
    # Select only the important features (same as original pipeline)
    important_feature_indices = [
        1,   # mfcc_2
        15,  # delta_2
        2,   # mfcc_3
        3,   # mfcc_4
        30,  # flatness
        4,   # mfcc_5
        6,   # mfcc_7
        16,  # delta_3
        9,   # mfcc_10
        0,   # mfcc_1
        29,  # temporal_centroid
        28   # pre_max_deriv
    ]
    
    # Select the optimized feature set
    X_selected = X[:, important_feature_indices]
    
    # Save features if paths provided
    if features_output_dir and labels_output_dir:
        np.save(features_output_dir, X_selected)
        np.save(labels_output_dir, y)
    
    print(f"\nFeature extraction complete:")
    print(f"Successfully processed: {len(features)} segments")
    print(f"Feature vector shape: {X_selected.shape}")
    print("\nFeature set includes:")
    print("- Selected MFCCs and their deltas")
    print("- Envelope descriptors (flatness, temporal centroid, pre-max derivative)")
    
    return X_selected, y

# Example usage:


In [5]:
snare_features, snare_labels = extract_features_from_user_input(
    segment_info_path='../csv_info/sd_segment_info.csv',
    segments_dir='../audio/sample1/segments/sd_segments',
    features_output_dir='../extracted_features/features/snare_features.npy',
    labels_output_dir='../extracted_features/labels/snare_labels.npy'
)

Extracting expanded feature set from ../audio/sample1/segments/sd_segments...


100%|██████████| 8/8 [00:00<00:00, 163.96it/s]


Feature extraction complete:
Successfully processed: 8 segments
Feature vector shape: (8, 12)

Feature set includes:
- Selected MFCCs and their deltas
- Envelope descriptors (flatness, temporal centroid, pre-max derivative)





In [6]:
kick_features, kick_labels = extract_features_from_user_input(
    segment_info_path='../csv_info/kd_segment_info.csv',
    segments_dir='../audio/sample1/segments/kd_segments',
    features_output_dir='../extracted_features/features/kick_features.npy',
    labels_output_dir='../extracted_features/labels/kick_labels.npy'
)

Extracting expanded feature set from ../audio/sample1/segments/kd_segments...


100%|██████████| 7/7 [00:00<00:00, 165.18it/s]


Feature extraction complete:
Successfully processed: 7 segments
Feature vector shape: (7, 12)

Feature set includes:
- Selected MFCCs and their deltas
- Envelope descriptors (flatness, temporal centroid, pre-max derivative)





In [14]:
def create_feature_weighted_knn(X_train, y_train, X_user, y_user, weight_factor=3.0, 
                              feature_weight_method='ridge', C=1.0, **knn_params):
    """
    Create a KNN model with learned feature weights from user examples.
    """
    # Learn feature weights from user examples
    if feature_weight_method == 'ridge':
        model = RidgeClassifier(alpha=1.0/C)
    elif feature_weight_method == 'lasso':
        model = LogisticRegression(penalty='l1', C=C, solver='liblinear')
    else:
        model = LogisticRegression(penalty='l1', C=C, solver='liblinear')
    
    # Fit the model to user examples
    model.fit(X_user, y_user)
    
    # Get feature weights
    if hasattr(model, 'coef_'):
        if len(model.coef_.shape) == 2:
            feature_weights = np.mean(np.abs(model.coef_), axis=0)
        else:
            feature_weights = np.abs(model.coef_)
    else:
        feature_weights = np.ones(X_user.shape[1])
    
    # Normalize weights to sum to 1
    feature_weights = feature_weights / np.sum(feature_weights)
    
    # Print top and bottom features
    top_indices = np.argsort(feature_weights)[-5:]
    bottom_indices = np.argsort(feature_weights)[:5]
    
    print("\nFeature weights learned from user examples:")
    print(f"Top 5 features: {top_indices} with weights {feature_weights[top_indices]}")
    print(f"Bottom 5 features: {bottom_indices} with weights {feature_weights[bottom_indices]}")
    
    # Repeat user examples
    n_repeats = int(weight_factor)
    X_user_repeated = np.repeat(X_user, n_repeats, axis=0)
    y_user_repeated = np.repeat(y_user, n_repeats)
    
    # Combine with training data
    X_combined = np.vstack([X_train, X_user_repeated])
    y_combined = np.concatenate([y_train, y_user_repeated])
    
    # Define weighted distance function
    def weighted_euclidean(x, y):
        return np.sqrt(np.sum(feature_weights * ((x - y) ** 2)))
    
    # Create KNN with custom metric
    params = knn_params.copy()
    params['metric'] = 'pyfunc'
    params['metric_params'] = {'func': weighted_euclidean}
    
    knn = KNeighborsClassifier(**params)
    knn.fit(X_combined, y_combined)
    
    return knn, feature_weights

def grid_search_hyperparameters(X_train, y_train, X_user, y_user, X_val, y_val, base_params):
    """
    Grid search for weight factors and regularization parameters.
    Validation is done only on user validation set.
    """
    weight_factors = [2, 2.5, 3, 3.5, 4]
    C_values = [0.3, 0.6, 1.0, 1.5, 2.0]
    
    best_score = 0
    best_params = {}
    results = []
    
    print("\nGrid searching weight factors and regularization parameters...")
    for weight_factor in weight_factors:
        for C in C_values:
            for method in ['ridge', 'lasso']:
                try:
                    model, _ = create_feature_weighted_knn(
                        X_train, y_train, X_user, y_user,
                        weight_factor=weight_factor,
                        feature_weight_method=method,
                        C=C,
                        **base_params
                    )
                    
                    # Evaluate on validation set
                    score = model.score(X_val, y_val)
                    results.append({
                        'weight_factor': weight_factor,
                        'C': C,
                        'method': method,
                        'score': score
                    })
                    
                    if score > best_score:
                        best_score = score
                        best_params = {
                            'weight_factor': weight_factor,
                            'C': C,
                            'method': method
                        }
                    
                    print(f"Weight={weight_factor}, C={C}, Method={method}: Score={score:.3f}")
                    
                except Exception as e:
                    print(f"Failed for weight={weight_factor}, C={C}, Method={method}: {str(e)}")
    
    # Sort and print results
    results.sort(key=lambda x: x['score'], reverse=True)
    print("\nTop 5 parameter combinations:")
    for i, result in enumerate(results[:5], 1):
        print(f"{i}. Weight={result['weight_factor']}, C={result['C']}, "
              f"Method={result['method']}: Score={result['score']:.3f}")
    
    return best_params, best_score

def create_hybrid_knn_model(base_features_path, base_labels_path, user_features_paths, user_labels_paths):
    """
    Create a hybrid KNN model using validation strategy from knn_hybrid.py
    """
    # Load base training data
    X_train = np.load(base_features_path)
    y_train = np.load(base_labels_path)
    
    # Load user examples and their labels
    X_user_list = []
    y_user_list = []
    for feature_path, label_path in zip(user_features_paths.values(), user_labels_paths.values()):
        features = np.load(feature_path)
        labels = np.load(label_path)
        X_user_list.append(features)
        y_user_list.append(labels)
    
    X_user = np.vstack(X_user_list)
    y_user = np.concatenate(y_user_list)
    
    # Split user examples into adaptation and validation sets
    examples_per_class = 5
    adapt_indices = []
    val_indices = []
    
    for label in np.unique(y_user):
        label_indices = np.where(y_user == label)[0]
        if len(label_indices) <= examples_per_class * 2:
            n_adapt = len(label_indices) // 2
        else:
            n_adapt = examples_per_class
            
        np.random.seed(42)
        selected_indices = np.random.choice(label_indices, n_adapt, replace=False)
        adapt_indices.extend(selected_indices)
        val_indices.extend([idx for idx in label_indices if idx not in selected_indices])
    
    # Split user data into adaptation and validation sets
    X_adapt = X_user[adapt_indices]
    y_adapt = y_user[adapt_indices]
    X_user_val = X_user[val_indices]
    y_user_val = y_user[val_indices]
    
    print("\nData split summary:")
    print(f"Base training examples: {len(X_train)}")
    print(f"User adaptation examples: {len(X_adapt)}")
    print(f"User validation examples: {len(X_user_val)}")
    
    print("\nAdaptation set label distribution:")
    print(pd.Series(y_adapt).value_counts())
    print("\nValidation set label distribution:")
    print(pd.Series(y_user_val).value_counts())
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_adapt_scaled = scaler.transform(X_adapt)
    X_user_val_scaled = scaler.transform(X_user_val)
    X_user_scaled = scaler.transform(X_user)  # Scale all user data for final model
    
    # Encode labels
    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_adapt_enc = le.transform(y_adapt)
    y_user_val_enc = le.transform(y_user_val)
    y_user_enc = le.transform(y_user)  # Encode all user labels for final model
    
    # First find best base model parameters
    param_grid = {
        'n_neighbors': [3, 5, 7, 9, 11],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan']
    }
    
    print("\nPerforming grid search for base model parameters...")
    base_knn = KNeighborsClassifier()
    grid_search = GridSearchCV(
        base_knn,
        param_grid,
        cv=5,
        scoring='accuracy',
        n_jobs=-1
    )
    grid_search.fit(X_train_scaled, y_train_enc)
    
    print("\nBest base model parameters:", grid_search.best_params_)
    print(f"Best cross-validation score: {grid_search.best_score_:.3f}")
    
    # Grid search for feature weighting parameters
    best_params, best_score = grid_search_hyperparameters(
        X_train_scaled, y_train_enc,
        X_adapt_scaled, y_adapt_enc,
        X_user_val_scaled, y_user_val_enc,
        grid_search.best_params_
    )
    
    # Create final model with best parameters using ALL data
    final_model, feature_weights = create_feature_weighted_knn(
        X_train_scaled, y_train_enc,
        X_user_scaled, y_user_enc,  # Use all user examples
        weight_factor=best_params['weight_factor'],
        feature_weight_method=best_params['method'],
        C=best_params['C'],
        **grid_search.best_params_
    )
    
    # Visualize feature weights
    plt.figure(figsize=(12, 6))
    plt.bar(range(len(feature_weights)), feature_weights)
    plt.title('Feature Weights in Final Model')
    plt.xlabel('Feature Index')
    plt.ylabel('Weight')
    
    # Save visualization
    vis_dir = "/Users/arul/ML/BEATBOX/projectFiles/visualization/hybridknn"
    os.makedirs(vis_dir, exist_ok=True)
    plt.savefig(os.path.join(vis_dir, 'final_feature_weights.png'))
    plt.close()
    
    print("\nFinal Model Summary:")
    print(f"Base training examples: {len(X_train)}")
    print(f"Total user examples: {len(X_user)}")
    print("\nBest parameters:")
    print(f"Base KNN parameters: {grid_search.best_params_}")
    print(f"Feature weighting parameters: {best_params}")
    
    return final_model, scaler, le, grid_search.best_params_, best_params, feature_weights

def predict_drum_type(model, scaler, label_encoder, features):
    """
    Predict drum types for new features using the hybrid model.
    Returns both predictions and probabilities.
    """
    features_scaled = scaler.transform(features)
    predictions_enc = model.predict(features_scaled)
    predictions = label_encoder.inverse_transform(predictions_enc)
    probabilities = model.predict_proba(features_scaled)
    
    return predictions, probabilities

def main():
    base_features_path = "/Users/arul/ML/BEATBOX/projectFiles/extracted_features/features/mfcc_extracted_features.npy"
    base_labels_path = "/Users/arul/ML/BEATBOX/projectFiles/extracted_features/labels/mfcc_extracted_labels.npy"
    
    # Paths for both features and labels
    user_features_paths = {
        'kick': '/Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/extracted_features/features/kick_features.npy',
        'snare': '/Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/extracted_features/features/snare_features.npy'
    }
    
    user_labels_paths = {
        'kick': '/Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/extracted_features/labels/kick_labels.npy',
        'snare': '/Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/extracted_features/labels/snare_labels.npy'
    }
    
    model, scaler, label_encoder, base_params, feature_params, feature_weights = create_hybrid_knn_model(
        base_features_path=base_features_path,
        base_labels_path=base_labels_path,
        user_features_paths=user_features_paths,
        user_labels_paths=user_labels_paths
    )
    
    print("\nModel is ready to use!")
    print("Class labels:", label_encoder.classes_)

if __name__ == "__main__":
    main()


Data split summary:
Base training examples: 5714
User adaptation examples: 7
User validation examples: 8

Adaptation set label distribution:
sd    4
kd    3
Name: count, dtype: int64

Validation set label distribution:
kd    4
sd    4
Name: count, dtype: int64

Performing grid search for base model parameters...

Best base model parameters: {'metric': 'euclidean', 'n_neighbors': 11, 'weights': 'uniform'}
Best cross-validation score: 0.514

Grid searching weight factors and regularization parameters...

Feature weights learned from user examples:
Top 5 features: [ 0  1  3 11 10] with weights [0.08338947 0.09972653 0.11888635 0.15797155 0.16685551]
Bottom 5 features: [9 4 8 2 5] with weights [0.01827592 0.04874903 0.04935377 0.05544346 0.05934138]
Weight=2, C=0.3, Method=ridge: Score=0.875

Feature weights learned from user examples:
Top 5 features: [ 7  8  9 11 10] with weights [0. 0. 0. 0. 1.]
Bottom 5 features: [0 1 2 3 4] with weights [0. 0. 0. 0. 0.]
Weight=2, C=0.3, Method=lasso: 

In [8]:
def segment_full_recording(input_file, output_dir, segment_info_path):
    """
    Segment a full vocal percussion recording with parameters optimized for detecting rapid successive onsets.
    """
    # Create output directories if they don't exist
    segments_dir = Path(output_dir)  # Now using the exact path specified
    segments_dir.mkdir(parents=True, exist_ok=True)
    
    # Create visualization directory
    vis_dir = Path("/Users/arul/ML/BEATBOX/projectFiles/visualization/final_pipeline")
    vis_dir.mkdir(parents=True, exist_ok=True)
    
    # Load the audio file
    y, sr = librosa.load(input_file)
    
    # Parameters optimized for detecting close successive onsets
    onset_frames = librosa.onset.onset_detect(
        y=y, 
        sr=sr,
        units='frames',
        hop_length=256,
        backtrack=True,
        pre_max=3,
        post_max=3,
        pre_avg=15,
        post_avg=15,
        delta=0.07,
        wait=5
    )
    
    # Visualize the onset detection
    plt.figure(figsize=(15, 5))
    
    # Plot waveform
    times = np.arange(len(y)) / sr
    plt.plot(times, y, alpha=0.6, label='Waveform')
    
    # Plot onset markers
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=256)
    plt.vlines(onset_times, -1, 1, color='r', label='Onsets')
    
    plt.title('Waveform and Detected Onsets')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.legend()
    plt.tight_layout()
    
    # Save the plot in the visualization directory
    plt.savefig(vis_dir / 'onset_detection.png')
    plt.close()
    
    # Convert frames to samples
    onset_samples = librosa.frames_to_samples(onset_frames, hop_length=256)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=256)
    
    # Initialize lists for segment info
    segment_paths = []
    start_times = []
    end_times = []
    
    # Segment duration in seconds
    segment_duration = 0.25
    samples_per_segment = int(segment_duration * sr)
    
    print(f"\nSegmenting audio file: {input_file}")
    print(f"Found {len(onset_frames)} onsets")
    
    # Process each onset
    for i, (onset_sample, onset_time) in enumerate(zip(onset_samples, onset_times)):
        # Calculate segment boundaries
        start_sample = max(0, onset_sample - int(0.05 * sr))
        end_sample = min(len(y), start_sample + samples_per_segment)
        
        # Extract segment
        segment = y[start_sample:end_sample]
        
        # Pad if necessary
        if len(segment) < samples_per_segment:
            segment = np.pad(segment, (0, samples_per_segment - len(segment)))
        
        # Generate segment filename
        segment_filename = f'segment_{i:04d}.wav'
        segment_path = segments_dir / segment_filename  # Save directly in output_dir
        
        # Save segment
        sf.write(segment_path, segment, sr)
        
        # Store segment information
        segment_paths.append(str(segment_path))
        start_times.append(onset_time)
        end_times.append(onset_time + segment_duration)
    
    # Create DataFrame
    segment_info = pd.DataFrame({
        'segment_path': segment_paths,
        'start_time': start_times,
        'end_time': end_times,
        'instrument_label': ''
    })
    
    # Save to CSV
    segment_info.to_csv(segment_info_path, index=False)
    
    print(f"\nProcessing complete:")
    print(f"Total segments created: {len(segment_paths)}")
    print(f"Segments saved in: {segments_dir}")
    print(f"CSV file saved as: {segment_info_path}")
    print(f"Onset detection plot saved as: {vis_dir}/onset_detection.png")
    print("\nPlease manually fill in the 'instrument_label' column with:")
    print("- 'kd' for kick drum")
    print("- 'sd' for snare drum")
    print("- 'hhc' for closed hi-hat")
    print("- 'hho' for open hi-hat")
    
    return segment_info

# Example usage
input_file = "/Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/audio/sample1/vox/full_vocal.wav"
output_dir = "/Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/audio/sample1/segments/full_segments"
segment_info_path = "/Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/csv_info/full_segment_info.csv"

segment_info = segment_full_recording(input_file, output_dir, segment_info_path)


Segmenting audio file: /Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/audio/sample1/vox/full_vocal.wav
Found 22 onsets

Processing complete:
Total segments created: 22
Segments saved in: /Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/audio/sample1/segments/full_segments
CSV file saved as: /Users/arul/ML/BEATBOX/projectFiles/code/final_pipeline/csv_info/full_segment_info.csv
Onset detection plot saved as: /Users/arul/ML/BEATBOX/projectFiles/visualization/final_pipeline/onset_detection.png

Please manually fill in the 'instrument_label' column with:
- 'kd' for kick drum
- 'sd' for snare drum
- 'hhc' for closed hi-hat
- 'hho' for open hi-hat


In [15]:
full_features, full_labels = extract_features_from_user_input(
    segment_info_path='../csv_info/full_segment_info.csv',
    segments_dir='../audio/sample1/segments/full_segments',
    features_output_dir='../extracted_features/features/full_features.npy',
    labels_output_dir='../extracted_features/labels/full_labels.npy'
)

Extracting expanded feature set from ../audio/sample1/segments/full_segments...


100%|██████████| 22/22 [00:00<00:00, 157.12it/s]


Feature extraction complete:
Successfully processed: 22 segments
Feature vector shape: (22, 12)

Feature set includes:
- Selected MFCCs and their deltas
- Envelope descriptors (flatness, temporal centroid, pre-max derivative)



