In [None]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Setting

In [None]:
# Import
import random
import pandas as pd
import numpy as np
import os
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, f1_score

from tqdm.auto import tqdm
import warnings
warnings.filterwarnings(action="ignore")

In [None]:
# Hyperparameter
CFG = {
    'SR':20000,
    'N_melspectrogram':160,
    'SEED':41,
}

In [None]:
# RandomSeed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(CFG['SEED']) # Seed 고정

## Data Preprocessing

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Machine_Anomaly/data/train.csv')
test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Machine_Anomaly/data/test.csv')

In [None]:
def img_path_change(img_path):
  return '/content/drive/MyDrive/Colab Notebooks/Machine_Anomaly/data' + str(img_path)[1:]

train['SAMPLE_PATH'] = train['SAMPLE_PATH'].apply(img_path_change)
test['SAMPLE_PATH'] = test['SAMPLE_PATH'].apply(img_path_change)

In [None]:
def get_melspect_feature(df):
  features = []
  for path in tqdm(df['SAMPLE_PATH']):
    # librosa 패키지를 사용하여 wav 파일 load
    y, sr = librosa.load(path, sr=CFG['SR'])

    # hpss를 이용한 feature 추출
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    # librosa 패키지를 사용하여 melspectrogram 추출
    melspec_harmonic = librosa.feature.melspectrogram(y_harmonic, n_mels = CFG['N_melspectrogram'])
    melspec_percussive = librosa.feature.melspectrogram(y_percussive, n_mels = CFG['N_melspectrogram'])

    # 둘의 평균
    hpss = np.average([melspec_harmonic, melspec_percussive], axis=0)

    # log scale 변환
    log_features = librosa.power_to_db(S=hpss, ref=1.0)

    # 추출된 melspectrogram의 델타값을 Feature로 사용
    y_feature = []
    for e in log_features:
      y_feature.append(np.mean(e))
    features.append(y_feature)
    
  return features

In [None]:
train_features = get_melspect_feature(train)
test_features = get_melspect_feature(test)

## Model

#### IsolationForest

In [None]:
model = IsolationForest(n_estimators=200, max_samples=256, contamination='auto', random_state=CFG['SEED'], verbose=0)
model.fit(train_features)

## Prediction

In [None]:
# isolation forest의 경우 실행
def get_pred_label(model_pred):
  # 1, -1을 0, 1로 변환
  model_pred = np.where(model_pred == 1, 0, model_pred)
  model_pred = np.where(model_pred == -1, 1, model_pred)
  return model_pred

test_pred = model.predict(test_features)
test_pred = get_pred_label(test_pred)

In [None]:
# threshold 조정
test_pred_proba = model.decision_function(test_features)
threshold = 0.04
test_pred = (test_pred_proba < threshold) * 1

## Submission

In [None]:
submit = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Machine_Anomaly/data/sample_submission.csv')

In [None]:
submit['LABEL'] = test_pred
submit.to_csv('mel_hpss_log_160.csv', index=False)