In [9]:
# !pip install librosa
# !pip install pydub
!sudo apt install ffmpeg



Sudo is disabled on this machine. To enable it, go to the ]8;;ms-settings:developers\Developer Settings page]8;;\ in the Settings app


In [43]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

# Define function to extract 1 sample per second
def extract_features_per_second(file_path, sr=16000):
    y, _ = librosa.load(file_path, sr=sr)
    duration = librosa.get_duration(y=y, sr=sr)
    num_seconds = int(duration)

    features = []

    for i in range(num_seconds):
        start = i * sr
        end = (i + 1) * sr
        if end > len(y):
            break

        y_segment = y[start:end]

        segment_features = []

        # MFCC
        mfcc = librosa.feature.mfcc(y=y_segment, sr=sr, n_mfcc=13)
        segment_features.extend(np.mean(mfcc, axis=1))

        # Spectral Centroid
        centroid = librosa.feature.spectral_centroid(y=y_segment, sr=sr)
        segment_features.append(np.mean(centroid))

        # Spectral Bandwidth
        bandwidth = librosa.feature.spectral_bandwidth(y=y_segment, sr=sr)
        segment_features.append(np.mean(bandwidth))

        # Spectral Rolloff
        rolloff = librosa.feature.spectral_rolloff(y=y_segment, sr=sr)
        segment_features.append(np.mean(rolloff))

        # Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y_segment)
        segment_features.append(np.mean(zcr))

        # Root Mean Square Energy
        rms = librosa.feature.rms(y=y_segment)
        segment_features.append(np.mean(rms))

        features.append(segment_features)

    return features

# Build feature names dynamically (13 MFCCs + 5 others)
def build_feature_names():
    feature_names = [f'mfcc_{i+1}' for i in range(13)]
    feature_names += ['spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff',
                      'zero_crossing_rate', 'rms']
    return feature_names

# Directory setup
base_dir = r"D:\PROJECT\test_files"
data = []

# Loop over folders (labels)
for label in os.listdir(base_dir):
    label_path = os.path.join(base_dir, label)
    if not os.path.isdir(label_path):
        continue

    print(f"\n📂 Processing label: {label}")

    for file in tqdm(os.listdir(label_path), desc=f"Processing {label}"):
        if not file.endswith(".wav"):
            continue
        file_path = os.path.join(label_path, file)

        try:
            feature_list = extract_features_per_second(file_path)
            feature_names = build_feature_names()
            for row in feature_list:
                if len(row) != len(feature_names):
                    raise ValueError(f"Feature size mismatch: expected {len(feature_names)}, got {len(row)}")
                data.append(row + [label])
        except Exception as e:
            print(f"❌ Failed to process {file_path}: {e}")

# Create DataFrame
columns = build_feature_names() + ['label']
df = pd.DataFrame(data, columns=columns)

# Save to CSV

print("✅ Feature extraction complete. CSV saved.")



📂 Processing label: test


Processing test: 100%|██████████| 2/2 [00:15<00:00,  7.86s/it]

✅ Feature extraction complete. CSV saved.





In [44]:
df

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13,spectral_centroid,spectral_bandwidth,spectral_rolloff,zero_crossing_rate,rms,label
0,-200.509064,112.375259,-21.813560,-14.113348,-30.809940,-10.061255,-4.012502,-31.492403,3.779655,-17.552675,-14.417215,-2.409977,-13.481340,1427.698176,1439.498066,2252.929688,0.118088,0.125944,test
1,-227.770096,81.983719,-26.648767,-12.659146,-5.370749,-16.519175,-16.746387,-0.469014,2.341555,-12.260289,-10.567823,-1.655251,-6.438926,2357.249528,1578.066633,3745.117188,0.236435,0.062122,test
2,-226.942108,128.657974,-109.617699,-28.437965,-18.532948,-22.057232,-5.216666,17.940517,-4.811624,3.923195,-5.861892,-10.093893,-2.354614,1412.929004,956.457645,2138.916016,0.134491,0.036736,test
3,-179.107819,103.379257,-115.996704,-39.555637,-25.722977,-19.093004,-8.217261,15.187338,-3.756955,6.173240,-9.361241,-2.947541,-11.943069,1484.839124,1014.475765,2176.513672,0.138947,0.053238,test
4,-196.039948,84.086082,-112.459290,-42.141533,-41.240017,-11.951616,-12.352276,6.076131,-0.428791,3.832598,-6.478458,0.490439,-13.146362,1553.698783,1118.746937,2343.017578,0.140427,0.042754,test
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,-296.380798,85.925667,-6.378998,4.019707,-9.480174,-28.543724,-8.037367,-5.121869,-7.725598,-13.714382,-10.529667,-4.534865,-7.306942,1731.629972,1540.388709,3386.230469,0.132477,0.065315,test
1196,-206.817169,74.131073,-26.174725,3.644636,-22.189034,-35.053993,-13.888941,-20.905640,-8.972501,-11.379476,-7.903916,-9.265242,-19.051685,1709.170705,1561.471524,3140.869141,0.125946,0.161284,test
1197,-348.416382,79.114502,-16.922371,-0.691815,3.333822,7.374169,-4.362174,-14.373482,-0.341225,1.792366,0.532584,-11.780407,-12.989074,1269.215702,1317.598152,2205.566406,0.093018,0.072733,test
1198,-216.679214,90.805305,-23.770758,-10.180990,-16.708393,-9.543818,-24.592430,-22.174715,-1.049071,-15.691978,-4.352066,-12.615644,-15.634091,1664.346384,1464.152517,2843.994141,0.126938,0.135073,test


In [45]:
df.to_csv("Fake_2.csv", index=False)