In [8]:
from pathlib import Path
import os
import librosa
import numpy as np
import glob
import soundfile as sf
import noisereduce as nr
import scipy.signal as signal
from scipy.io import wavfile

# 预处理

In [3]:
def denoise_spectral_gate(
    waveform,
    sample_rate,
    stationary = True,
    noise_duration = 0.5,
    n_fft = 4096,
    hop_length = 512,
    time_mask_smooth_ms= 80,
    freq_mask_smooth_hz = 400,
    prop_decrease = 0.8,
):
    """Spectral gating noise reduction"""
    noise_samples = int(noise_duration * sample_rate)
    y_noise = waveform[:noise_samples]
    return nr.reduce_noise(
        waveform,
        sample_rate,
        stationary=stationary,
        y_noise=y_noise,
        n_fft=n_fft,
        hop_length=hop_length,
        time_mask_smooth_ms=time_mask_smooth_ms,
        freq_mask_smooth_hz=freq_mask_smooth_hz,
        prop_decrease=prop_decrease,
    )

def normalize_and_center(audio) -> np.ndarray:
    """Peak normalization + zero-mean"""
    max_amp = np.max(np.abs(audio))
    normalized = audio / max_amp if max_amp > 0 else audio
    return normalized - normalized.mean()

def apply_pre_emphasis(audio, alpha= 0.97):
    """Pre-emphasis filter (Notebook 3)."""
    pre_emphasis = np.array([1.0, -alpha])
    return signal.lfilter(pre_emphasis, 1, audio)

In [6]:
INPUT_ROOT = Path(r"X:\数据集\DeepShip\data_preprocessing\data_new")
OUTPUT_ROOT = Path(r"X:\数据集\DeepShip\data_preprocessing\data_new_preprocessed")
CATEGORIES = ["Cargo", "Passengership", "Tanker", "Tug"]

for category in CATEGORIES:
    in_dir = INPUT_ROOT / category
    out_dir = OUTPUT_ROOT / category
    out_dir.mkdir(parents=True, exist_ok=True)

    for wav_path in sorted(in_dir.glob("*.wav")):
        out_path = out_dir / wav_path.name

        if out_path.exists():
            print(f"{category}: {wav_path.name} already done, skip.")
            continue

        waveform, sr = librosa.load(wav_path, sr=None)

        denoised = denoise_spectral_gate(waveform, sr)
        normalized = normalize_and_center(denoised)
        emphasized = apply_pre_emphasis(normalized)

        sf.write(out_dir / wav_path.name, emphasized, sr)
        print(f"{category}: {wav_path.name} processed.")

Cargo: 0_1.wav already done, skip.
Cargo: 0_10.wav already done, skip.
Cargo: 0_100.wav already done, skip.
Cargo: 0_101.wav already done, skip.
Cargo: 0_102.wav already done, skip.
Cargo: 0_103.wav already done, skip.
Cargo: 0_104.wav already done, skip.
Cargo: 0_105.wav already done, skip.
Cargo: 0_106.wav already done, skip.
Cargo: 0_107.wav already done, skip.
Cargo: 0_108.wav already done, skip.
Cargo: 0_109.wav already done, skip.
Cargo: 0_11.wav already done, skip.
Cargo: 0_110.wav already done, skip.
Cargo: 0_111.wav already done, skip.
Cargo: 0_12.wav already done, skip.
Cargo: 0_13.wav already done, skip.
Cargo: 0_14.wav already done, skip.
Cargo: 0_15.wav already done, skip.
Cargo: 0_16.wav already done, skip.
Cargo: 0_17.wav already done, skip.
Cargo: 0_18.wav already done, skip.
Cargo: 0_19.wav already done, skip.
Cargo: 0_2.wav already done, skip.
Cargo: 0_20.wav already done, skip.
Cargo: 0_22.wav already done, skip.
Cargo: 0_24.wav already done, skip.
Cargo: 0_25.wav al

# 分针加窗

In [14]:
frame_duration = 2  # 帧时长为秒
overlap_rate = 0.5  # 帧与帧之间的重叠率为50%

INPUT_ROOT = Path(r"X:\数据集\DeepShip\data_preprocessing\data_new_preprocessed")
OUTPUT_ROOT = Path(r"X:\数据集\DeepShip\data_preprocessing\data_new_frame_and_window")

# 定义加窗函数
def apply_window(frame):
    window = np.hanning(len(frame))
    return frame * window

In [15]:
for category in ["Cargo", "Passengership", "Tanker", "Tug"]:
    in_dir = INPUT_ROOT / category
    out_dir = OUTPUT_ROOT / category
    out_dir.mkdir(parents=True, exist_ok=True)

    wav_paths = sorted(glob.glob(str(in_dir / "*.wav")))
    if not wav_paths:
        print(f"{category}: no wav files found, skip.")
        continue
    
    for wav_path in wav_paths:
        file_name = Path(wav_path).name
        file_stem = Path(wav_path).stem

        audio, sr = librosa.load(wav_path, sr=None)
        print(f"Processing file: {file_name}, Sample Rate: {sr}, Total Samples: {len(audio)}")

        frame_length = int(frame_duration * sr)
        hop_length = int(frame_length * (1.0 - overlap_rate))

        total_samples = len(audio)
        if total_samples < frame_length:
            pad_len = frame_length - total_samples
        else:
            remainder = (total_samples - frame_length) % hop_length
            pad_len = 0 if remainder == 0 else hop_length - remainder

        audio_padded = (
            np.pad(audio, (0, pad_len), mode="constant") if pad_len > 0 else audio
        )

        frames = librosa.util.frame(
            audio_padded, frame_length=frame_length, hop_length=hop_length
        )
        frames_windowed = np.apply_along_axis(apply_window, 0, frames)

        for i, frame_windowed in enumerate(frames_windowed.T, start=1):
            out_path = out_dir / f"{file_stem}_{i}.wav"
            wavfile.write(out_path, sr, frame_windowed)
            print(f"{category}: processed {file_name} -> {out_path.name}")
print("All files processed.")

Processing file: 0_1.wav, Sample Rate: 32000, Total Samples: 14624000
Cargo: processed 0_1.wav -> 0_1_1.wav
Cargo: processed 0_1.wav -> 0_1_2.wav
Cargo: processed 0_1.wav -> 0_1_3.wav
Cargo: processed 0_1.wav -> 0_1_4.wav
Cargo: processed 0_1.wav -> 0_1_5.wav
Cargo: processed 0_1.wav -> 0_1_6.wav
Cargo: processed 0_1.wav -> 0_1_7.wav
Cargo: processed 0_1.wav -> 0_1_8.wav
Cargo: processed 0_1.wav -> 0_1_9.wav
Cargo: processed 0_1.wav -> 0_1_10.wav
Cargo: processed 0_1.wav -> 0_1_11.wav
Cargo: processed 0_1.wav -> 0_1_12.wav
Cargo: processed 0_1.wav -> 0_1_13.wav
Cargo: processed 0_1.wav -> 0_1_14.wav
Cargo: processed 0_1.wav -> 0_1_15.wav
Cargo: processed 0_1.wav -> 0_1_16.wav
Cargo: processed 0_1.wav -> 0_1_17.wav
Cargo: processed 0_1.wav -> 0_1_18.wav
Cargo: processed 0_1.wav -> 0_1_19.wav
Cargo: processed 0_1.wav -> 0_1_20.wav
Cargo: processed 0_1.wav -> 0_1_21.wav
Cargo: processed 0_1.wav -> 0_1_22.wav
Cargo: processed 0_1.wav -> 0_1_23.wav
Cargo: processed 0_1.wav -> 0_1_24.wav
Car

# 提取特征

In [20]:
SAMPLE_RATE = 16000
N_MEL = 128
N_MFCC = 40 
N_FFT = 2048
HOP_LENGTH = 512

CATEGORIES = ["Cargo", "Passengership", "Tanker", "Tug"]

INPUT_ROOT = r"X:\数据集\DeepShip\data_preprocessing\data_new_frame_and_window"
MEL_ROOT   = r"X:\数据集\DeepShip\data_preprocessing\data_new_extract\mel"
MFCC_ROOT  = r"X:\数据集\DeepShip\data_preprocessing\data_new_extract\mfcc"

In [22]:
def extract_mel_npy(in_dir, out_dir, samplerate, n_mels, n_fft, hop_length):
    os.makedirs(out_dir, exist_ok=True)
  
    for wav in glob.glob(os.path.join(in_dir, '*.wav')):
        filename = os.path.splitext(os.path.basename(wav))[0]
        output_path = os.path.join(out_dir, f"{filename}.npy")

        if os.path.exists(output_path):
            continue

        y, sr = librosa.load(wav, sr=samplerate)
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        mel_db = librosa.power_to_db(mel, ref=np.max)
        np.save(output_path, mel_db)

def extract_mfcc_npy(in_dir, out_dir, samplerate, n_mfcc):
    os.makedirs(out_dir, exist_ok=True)

    for wav in glob.glob(os.path.join(in_dir, '*.wav')):
        filename = os.path.splitext(os.path.basename(wav))[0]
        output_path = os.path.join(out_dir, f"{filename}.npy")

        if os.path.exists(output_path):
            continue

        y, sr = librosa.load(wav, sr=samplerate)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        np.save(output_path, mfcc)

In [23]:
for category in CATEGORIES:
    in_dir = os.path.join(INPUT_ROOT, category)

    mel_out  = os.path.join(MEL_ROOT, category)
    mfcc_out = os.path.join(MFCC_ROOT, category)

    extract_mel_npy(in_dir, mel_out, SAMPLE_RATE, N_MEL, N_FFT, HOP_LENGTH)
    extract_mfcc_npy(in_dir, mfcc_out, SAMPLE_RATE, N_MFCC)

    print(f"{category}: feature extraction complete.")

Cargo: feature extraction complete.
Passengership: feature extraction complete.
Tanker: feature extraction complete.
Tug: feature extraction complete.
