In [1]:
import lightgbm as lgb

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
import librosa

files_normaly = sorted(glob.glob(os.path.abspath('../input/train/*/normal/*.wav')))
files_anomaly = sorted(glob.glob(os.path.abspath('../input/train/*/anomaly/*.wav')))
files_test = sorted(glob.glob(os.path.abspath('../input/test/*.wav')))

normal = []
for file in files_normaly:
    y, sr = librosa.load(file, sr=None)
    normal.append(y)

normal = np.array(normal)

anomaly = []
for file in files_anomaly:
    y, sr = librosa.load(file, sr=None)
    anomaly.append(y)
anomaly = np.array(anomaly)

test = []
for file in files_test:
    y, sr = librosa.load(file, sr=None)
    test.append(y)
test = np.array(test)


In [3]:
melspec_normal = []
for n in normal:
    m = librosa.feature.melspectrogram(n, n_mels=256)
    m = librosa.power_to_db(m).astype(np.float32)
    melspec_normal.append(m)
melspec_normal = np.array(melspec_normal)

melspec_anomaly = []
for a in anomaly:
    m = librosa.feature.melspectrogram(a, n_mels=256)
    m = librosa.power_to_db(m).astype(np.float32)
    melspec_anomaly.append(m)
melspec_anomaly = np.array(melspec_anomaly)

melspec_test = []
for t in test:
    m = librosa.feature.melspectrogram(t, n_mels=256)
    m = librosa.power_to_db(m).astype(np.float32)
    melspec_test.append(m)
melspec_test = np.array(melspec_test)



In [4]:
train = np.concatenate([melspec_normal, melspec_anomaly])

train = train.reshape(train.shape[0],-1)
test = melspec_test.reshape(melspec_test.shape[0], -1)

target = np.concatenate([np.zeros(len(melspec_normal)), np.ones(len(melspec_anomaly))])

from sklearn.model_selection import train_test_split

train_X, test_X, train_Y, test_Y = train_test_split(train, target, test_size=0.2, random_state=42)


In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(train_X)
train_X = scaler.transform(train_X)
test_X = scaler.transform(test_X)
test = scaler.transform(test)


In [6]:
lgb_train = lgb.Dataset(train_X, train_Y)
lgb_eval = lgb.Dataset(test_X, test_Y, reference=lgb_train)


In [7]:
lgbm_params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'max_depth': 7,
    'verbose': -1,
}

In [None]:
model = lgb.train(lgbm_params, lgb_train, valid_sets=lgb_eval,
                 verbose_eval=50,
                 num_boost_round=1000,
                 early_stopping_rounds=100)

Training until validation scores don't improve for 100 rounds
[50]	valid_0's binary_logloss: 0.285067


In [9]:
predict_proba = model.predict(test_X, num_iteration=model.best_iteration)


In [10]:
pred = [0 if i < 0.5 else 1 for i in predict_proba]

In [11]:
predict_proba = model.predict(test, num_iteration= model.best_iteration)


In [12]:
pred = [0 if i < 0.5 else 1 for i in predict_proba]

In [13]:
sub = pd.read_csv('../input/sample_submission.csv', header=None)


In [14]:
sub[1] = pred


In [15]:
sub.head()

Unnamed: 0,0,1
0,machine01_000.wav,0
1,machine01_001.wav,1
2,machine01_002.wav,1
3,machine01_003.wav,0
4,machine01_004.wav,0


In [28]:
len(pred)

3292

In [16]:
sub.to_csv('submit.csv', index=False, header=False)
