In [None]:
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import os

In [None]:
# Custom Hann window implementation
def hann_window(N):
    """
    Implement Hann window from scratch
    N: window length
    """
    n = np.arange(N)
    return 0.5 * (1 - np.cos(2 * np.pi * n / (N - 1)))

In [None]:
# Function to load and preprocess audio
def load_audio(file_path, sr=22050, duration=3.0):
    """
    Load audio file and trim/pad to fixed duration
    """
    y, sr = librosa.load(file_path, sr=sr, duration=duration)
    # Pad or trim to fixed length
    if len(y) < sr * duration:
        y = np.pad(y, (0, int(sr * duration - len(y))))
    else:
        y = y[:int(sr * duration)]
    return y, sr


In [None]:
# Function to generate spectrograms with different windows
def generate_spectrogram(y, sr, window_type='hann', n_fft=2048, hop_length=512):
    """
    Generate spectrogram using specified window type
    """
    if window_type == 'hann_custom':
        window = hann_window(n_fft)
    elif window_type == 'hamming':
        window = np.hamming(n_fft)
    elif window_type == 'rectangular':
        window = np.ones(n_fft)
    else:  # default librosa hann window
        window = 'hann'
    
    # Compute STFT
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window=window)
    # Convert to log-scale spectrogram
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    return S_db

In [None]:
# Function to extract features from spectrogram
def extract_features(S_db):
    """
    Extract simple features from spectrogram
    """
    # Mean and standard deviation of each frequency bin
    mean_features = np.mean(S_db, axis=1)
    std_features = np.std(S_db, axis=1)
    return np.concatenate([mean_features, std_features])

In [None]:
# Load and process data from fold1
def load_dataset(fold_num=1):
    metadata = pd.read_csv('metadata/UrbanSound8K.csv')
    fold_data = metadata[metadata['fold'] == fold_num]
    
    X = []
    y = []
    
    for _, row in fold_data.iterrows():
        file_path = os.path.join('audio', f'fold{fold_num}', row['slice_file_name'])
        try:
            audio, sr = load_audio(file_path)
            
            # Generate spectrograms with different windows
            specs = []
            for window in ['hann_custom', 'hamming', 'rectangular']:
                S_db = generate_spectrogram(audio, sr, window_type=window)
                features = extract_features(S_db)
                specs.append(features)
            
            # Concatenate features from all window types
            X.append(np.concatenate(specs))
            y.append(row['classID'])
        except:
            print(f"Error processing {file_path}")
            continue
    
    return np.array(X), np.array(y)


In [None]:
# Load dataset
X, y = load_dataset(fold_num=1)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM classifier
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train_scaled, y_train)

# Evaluate
y_pred = svm.predict(X_test_scaled)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Visualize spectrograms for comparison
# Load a sample audio file
sample_audio, sr = load_audio(os.path.join('audio', 'fold1', metadata[metadata['fold'] == 1]['slice_file_name'].iloc[0]))

# Plot spectrograms with different windows
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
window_types = ['hann_custom', 'hamming', 'rectangular']

for i, window_type in enumerate(window_types):
    S_db = generate_spectrogram(sample_audio, sr, window_type=window_type)
    img = librosa.display.specshow(S_db, x_axis='time', y_axis='hz', ax=axes[i])
    axes[i].set_title(f'{window_type} Window')

plt.tight_layout()
plt.show()