In [None]:
    from google.colab import drive
    drive.mount('/content/drive')

In [None]:
import pandas as pd

META_PATH = "/content/drive/MyDrive/UrbanSound8K.csv"
df = pd.read_csv(META_PATH)
print(df.head())
print(df.columns)
print(df['class'].unique()[:20])   # 클래스명 확인
print(df['classID'].unique()[:20]) # 숫자 ID 확인

In [None]:
id2name = {
    0:'air_conditioner',
    1:'car_horn',
    2:'children_playing',
    3:'dog_bark',
    4:'drilling',
    5:'engine_idling',
    6:'gun_shot',
    7:'jackhammer',
    8:'siren',
    9:'street_music'
}

In [None]:
print(df['class'].unique())

In [None]:
import os, pandas as pd

BASE_DIR_US8K = "/content/drive/MyDrive/fold2"  # 본인 경로
META_PATHS = [
    os.path.join(BASE_DIR_US8K, "metadata", "/content/drive/MyDrive/UrbanSound8K.csv"),
    os.path.join(BASE_DIR_US8K, "/content/drive/MyDrive/UrbanSound8K.csv"),
]
META_PATH = next((p for p in META_PATHS if os.path.exists(p)), None)
assert META_PATH, "UrbanSound8K.csv 경로를 확인하세요."

df = pd.read_csv(META_PATH)
assert {'slice_file_name','fold','classID'}.issubset(df.columns), "CSV 컬럼 확인 필요"

sub = df[df['classID'].isin(target_ids)].copy()
counts = sub.groupby(['fold','classID']).size().unstack(fill_value=0)
print("fold별 car_horn(1)/siren(8) 분포:\n", counts)

In [None]:
import os, pandas as pd

BASE_DIR_US8K = "/content/drive/MyDrive/fold2"  # 본인 경로
META_PATHS = [
    os.path.join(BASE_DIR_US8K, "metadata", "/content/drive/MyDrive/UrbanSound8K.csv"),
    os.path.join(BASE_DIR_US8K, "/content/drive/MyDrive/UrbanSound8K.csv"),
]
META_PATH = next((p for p in META_PATHS if os.path.exists(p)), None)
assert META_PATH, "UrbanSound8K.csv 경로를 확인하세요."

In [None]:
# 1) 대상 클래스/폴드
TARGET_FOLD = 2
ID2NAME = {0:'air_conditioner',1:'car_horn',2:'children_playing',3:'dog_bark',
           4:'drilling',5:'engine_idling',6:'gun_shot',7:'jackhammer',
           8:'siren',9:'street_music'}
TARGET_IDS = [1, 8]  # car_horn, siren

In [None]:
# 2) CSV 로드 및 분포 확인
df = pd.read_csv(META_PATH)
need_cols = {'slice_file_name','fold','classID'}
assert need_cols.issubset(df.columns), f"CSV 컬럼 부족: {df.columns}"

sub_all = df[df['classID'].isin(TARGET_IDS)]
print("전체 fold 분포:")
print(sub_all.groupby(['fold','classID']).size().unstack(fill_value=0))

sub = df[(df['fold']==TARGET_FOLD) & (df['classID'].isin(TARGET_IDS))].copy()
print(f"\nfold{TARGET_FOLD} 분포:")
print(sub['classID'].value_counts())  # 여기서 1=car_horn, 8=siren (예상: 1:42, 8:91)

In [None]:
# 3) 경로 해결(폴더 구조/대소문자 유연 탐색)
def resolve_path(base_dir, fold, fname):
    patterns = [
        Path(base_dir)/"audio"/f"fold{fold}"/fname,
        Path(base_dir)/f"fold{fold}"/fname,
        Path(base_dir)/"audio"/fname,
        Path(base_dir)/fname,
    ]
    for p in patterns:
        if p.exists():
            return str(p)
    # 확장자 대소문자 보정(.wav <-> .WAV)
    if fname.lower().endswith(".wav"):
        alt = fname[:-4] + ".WAV"
    else:
        alt = fname + ".wav"
    for p in [
        Path(base_dir)/"audio"/f"fold{fold}"/alt,
        Path(base_dir)/f"fold{fold}"/alt,
        Path(base_dir)/"audio"/alt,
        Path(base_dir)/alt,
    ]:
        if p.exists():
            return str(p)
    return None

pairs = []
for _, r in sub.iterrows():
    fpath = resolve_path(BASE, int(r['fold']), r['slice_file_name'])
    if fpath:
        pairs.append((fpath, ID2NAME[int(r['classID'])]))

print(f"\n[US8K] fold{TARGET_FOLD}: {len(pairs)} files (car_horn/siren)")
print("샘플:", pairs[:5])


In [None]:
# 4) 전처리: 1초 창 log-mel(64 mel) -> /content/dataset/fold2/X.npy, y.npy
import soundfile as sf
import librosa
import numpy as np
import math
from pathlib import Path
import os

SR=16000; N_MELS=64; WIN_LEN=1024; HOP_LEN=320
SEG_SECONDS=1.0; SEG_SAMPLES=int(SR*SEG_SECONDS); SEG_HOP=int(SR*0.5)
CLASS2IDX={'siren':0, 'car_horn':1}

def load_mono(path, sr=SR):
    y, orig = sf.read(path, always_2d=False)
    if y.ndim>1: y = y.mean(axis=1)
    if orig!=sr: y = librosa.resample(y=y, orig_sr=orig, target_sr=sr)
    return y

def segment(y, seg_len=SEG_SAMPLES, hop=SEG_HOP):
    return [y[i:i+seg_len] for i in range(0, max(0, len(y)-seg_len+1), hop)]

def to_logmel(y):
    S = librosa.feature.melspectrogram(y=y, sr=SR, n_fft=WIN_LEN, hop_length=HOP_LEN,
                                       n_mels=N_MELS, power=2.0, fmin=20, fmax=SR//2)
    return librosa.power_to_db(S, ref=np.max).astype(np.float32)

def build_Xy(pairs):
    X, y = [], []
    T_target = math.ceil((SEG_SAMPLES - WIN_LEN) / HOP_LEN) + 1  # ≈49~50 프레임
    for fpath, cname in pairs:
        wav = load_mono(fpath, SR)
        for seg in segment(wav):
            if len(seg) < SEG_SAMPLES: continue
            mel = to_logmel(seg)  # (64, T)
            if mel.shape[1] < T_target:
                mel = np.pad(mel, ((0,0),(0,T_target-mel.shape[1])),
                             mode='constant', constant_values=mel.min())
            elif mel.shape[1] > T_target:
                mel = mel[:, :T_target]
            X.append(mel); y.append(CLASS2IDX[cname])
    X = np.array(X, dtype=np.float32)[:,None,:,:]  # (N,1,64,T)
    y = np.array(y, dtype=np.int64)
    return X, y

if len(pairs)==0:
    print("⚠️ fold2에서 해당 클래스 파일을 찾지 못했습니다. BASE/폴더 구조/파일명 대소문자를 재확인하세요.")
else:
    X, y = build_Xy(pairs)
    outdir = "/content/dataset/fold2"; os.makedirs(outdir, exist_ok=True)
    np.save(f"{outdir}/X.npy", X); np.save(f"{outdir}/y.npy", y)
    with open(f"{outdir}/label_map.txt","w") as f:
        f.write("0\tsiren\n1\tcar_horn\n")
    print(f"✅ Saved: {outdir} | X{X.shape}, y{y.shape}")

## 1) 데이터 로드 & 학습/검증 분리

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

X = np.load("/content/dataset/fold2/X.npy")  # (N,1,64,T)
y = np.load("/content/dataset/fold2/y.npy")  # (N,)
# Keras 입력 형상: (N, 64, T, 1)
X = np.transpose(X, (0,2,3,1))  # (N,64,T,1)

X_tr, X_va, y_tr, y_va = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_tr.shape, X_va.shape, y_tr.shape, y_va.shape

## 2) 경량 CNN 모델 정의 & 학습

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

num_classes = 2

inputs = keras.Input(shape=X_tr.shape[1:])  # (64, T, 1)
x = layers.Conv2D(16, (3,3), padding="same", activation="relu")(inputs)
x = layers.BatchNormalization()(x)
x = layers.MaxPool2D((2,2))(x)

x = layers.Conv2D(32, (3,3), padding="same", activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPool2D((2,2))(x)

x = layers.Conv2D(64, (3,3), padding="same", activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)

outputs = layers.Dense(num_classes, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.summary()

model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

cb = [
  keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor="val_accuracy"),
  keras.callbacks.ReduceLROnPlateau(patience=2, factor=0.5)
]

history = model.fit(
    X_tr, y_tr,
    validation_data=(X_va, y_va),
    epochs=30, batch_size=64,
    callbacks=cb, verbose=1
)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

y_pred = np.argmax(model.predict(X_va), axis=1)
print(confusion_matrix(y_va, y_pred))
print(classification_report(y_va, y_pred, target_names=["siren","car_horn"]))

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
import numpy as np

# y_va: 실제 레이블
# y_pred: 예측된 레이블
labels = ["siren", "car_horn"]

# 혼동행렬 생성
cm = confusion_matrix(y_va, y_pred)

# pandas DataFrame으로 보기 좋게 변환
cm_df = pd.DataFrame(cm, index=[f"True_{l}" for l in labels],
                         columns=[f"Pred_{l}" for l in labels])

print("📊 Confusion Matrix:")
print(cm_df)

# 분류 리포트도 함께 출력
print("\n📋 Classification Report:")
print(classification_report(y_va, y_pred, target_names=labels))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["siren", "car_horn"], yticklabels=["siren", "car-horn"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title(' Confusion Matrix')
plt.show()

In [None]:
# SavedModel
model.export("/content/safesound_cnn")

# TFLite
converter = tf.lite.TFLiteConverter.from_saved_model("/content/safesound_cnn")
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # 경량화
tflite_model = converter.convert()
with open("/content/safesound_cnn.tflite", "wb") as f:
    f.write(tflite_model)
print("✅ Exported /content/safesound_cnn.tflite")

In [None]:
import librosa, soundfile as sf, math
import numpy as np
import tensorflow as tf

SR=16000; N_MELS=64; WIN_LEN=1024; HOP_LEN=320
SEG_SECONDS=1.0; SEG_SAMPLES=int(SR*SEG_SECONDS); SEG_HOP=int(SR*0.5)
ENTER_TH=0.8; EXIT_TH=0.6; MIN_CONSEC=2  # 1초 창에서 2연속=약 1초 지속

interpreter = tf.lite.Interpreter("/content/safesound_cnn.tflite")
interpreter.allocate_tensors()
in_detail = interpreter.get_input_details()[0]
out_detail = interpreter.get_output_details()[0]

def logmel_1s(y):
    S = librosa.feature.melspectrogram(y=y, sr=SR, n_fft=WIN_LEN, hop_length=HOP_LEN,
                                       n_mels=N_MELS, power=2.0, fmin=20, fmax=SR//2)
    S = librosa.power_to_db(S, ref=np.max).astype(np.float32)
    # 길이 보정(T≈49~50)
    T_target = math.ceil((SEG_SAMPLES - WIN_LEN)/HOP_LEN)+1
    if S.shape[1] < T_target:
        S = np.pad(S, ((0,0),(0,T_target-S.shape[1])), mode="constant", constant_values=S.min())
    elif S.shape[1] > T_target:
        S = S[:, :T_target]
    return S  # (64,T)

def sliding_segments(y):
    segs = []
    for st in range(0, max(0, len(y)-SEG_SAMPLES+1), SEG_HOP):
        segs.append(y[st:st+SEG_SAMPLES])
    return segs

def predict_file(path):
    y, sr = sf.read(path, always_2d=False)
    if y.ndim>1: y=y.mean(axis=1)
    if sr!=SR:  y=librosa.resample(y, orig_sr=sr, target_sr=SR, res_type='kaiser_fast')

    probs=[]
    for seg in sliding_segments(y):
        if len(seg)<SEG_SAMPLES: continue
        mel = logmel_1s(seg)                # (64,T)
        mel = mel[np.newaxis,...,np.newaxis]# (1,64,T,1)
        interpreter.set_tensor(in_detail['index'], mel)
        interpreter.invoke()
        p = interpreter.get_tensor(out_detail['index'])[0]  # (2,)
        probs.append(p)  # [p_siren, p_car_horn]
    probs = np.array(probs)
    return probs  # (N,2)

def smooth_and_decide(probs, cls=0):
    # cls=0: siren, 1: car_horn
    cnt=0; events=[]
    active=False
    for i,p in enumerate(probs[:,cls]):
        if not active:
            if p>=ENTER_TH:
                cnt+=1
                if cnt>=MIN_CONSEC:
                    active=True
                    start=i-MIN_CONSEC+1
                    cnt=0
            else:
                cnt=0
        else:
            if p<=EXIT_TH:
                active=False
                events.append((start, i))  # [start_idx, end_idx]
    if active:
        events.append((start, len(probs)-1))
    return events

# 실행 예시
probs = predict_file("/content/example.wav")  # 예시 파일 경로
siren_events = smooth_and_decide(probs, cls=0)
horn_events  = smooth_and_decide(probs, cls=1)
print("SIREN events:", siren_events)
print("CAR_HORN events:", horn_events)
