In [1]:
import pandas as pd
from pandas import DataFrame

from sklearn.preprocessing import OneHotEncoder, LabelEncoder

from datetime import datetime
import numpy as np

In [2]:
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

In [3]:
train.head(10)

Unnamed: 0,ID,cr_ID,assembly_time,eq_ID,process_time,ramp,servo_defect,data_defect,status
0,HDDSN00000,CRSTR01L,2015-03-09 09:36:00,EGRWT0113,2015-03-09 11:33:55,4561.0,6083.0,1341.0,pass
1,HDDSN00001,CRSTR01Q,2015-03-10 02:13:00,EGRWT0102,2015-03-10 03:16:18,3898.0,1819.0,8037.0,pass
2,HDDSN00002,CRSTR01N,2015-03-06 23:06:00,EGRWT0103,2015-03-07 00:00:12,3016.0,4997.0,1583.0,pass
3,HDDSN00003,CRSTR01A,2015-03-08 05:10:00,EGRWT0107,2015-03-08 06:36:18,1605.0,4415.0,3345.0,pass
4,HDDSN00004,CRSTR01C,2015-03-09 06:44:00,EGRWT0113,2015-03-09 09:13:40,1002.0,2394.0,3296.0,pass
5,HDDSN00005,CRSTR01I,2015-03-10 14:10:00,EGRWT0112,2015-03-10 15:15:19,1984.0,2735.0,2953.0,pass
6,HDDSN00006,CRSTR01F,2015-03-08 11:34:00,EGRWT0105,2015-03-08 13:29:41,1356.0,2748.0,2974.0,pass
7,HDDSN00007,CRSTR01D,2015-03-11 23:06:00,EGRWT0105,2015-03-12 00:26:45,1643.0,3566.0,1482.0,pass
8,HDDSN00008,CRSTR01E,2015-03-08 05:29:00,EGRWT0114,2015-03-08 06:07:16,1618.0,3610.0,1823.0,pass
9,HDDSN00009,CRSTR01M,2015-03-09 17:08:00,EGRWT0103,2015-03-09 19:48:12,4789.0,3331.0,7306.0,fail


In [4]:
train.loc[train.status == "fail", "status"] = 0
train.loc[train.status == "pass", "status"] = 1

label = LabelEncoder()

# 1. assembly_time 일 추출 => assembly_time_date
train['assembly_time_date'] = train['assembly_time'].str.slice(start=8, stop=10).astype(int)
# 1-1. assembly_time 요일 추출 => assembly_time_day
train['assembly_time_day'] = train['assembly_time'].apply(lambda x : datetime(int(x[0:4]), int(x[5:7]), int(x[8:10])).weekday())

# 2. process_time과 assembly_time 사이 간격 구하기 => period_sec (초단위)
train['assembly_time'] = train['assembly_time'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
train['process_time'] = train['process_time'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
train['period'] = train['process_time'] - train['assembly_time']
train['period'] = train['period'].apply(lambda x : x.seconds)
# 2-1. period_sec 구간 나누기 => period_sec_bin_code
train['period'] = pd.qcut(train['period'].astype(int), 17)
train['period'] = label.fit_transform(train['period'])

# 3. ramp 구간 나누기
# train['ramp'] = pd.cut(train['ramp'].astype(int), 1200)
train['ramp'] = pd.qcut(train['ramp'].astype(int), 10)
train['ramp'] = label.fit_transform(train['ramp'])

# 4. servo_defect 구간 나누기
# train['servo_defect'] = pd.cut(train['servo_defect'].astype(int), 1000)
train['servo_defect'] = pd.qcut(train['servo_defect'].astype(int), 6)
train['servo_defect'] = label.fit_transform(train['servo_defect'])

# 5. data_defect 구간 나누기
# train['data_defect'] = pd.cut(train['data_defect'].astype(int), 1000)
train['data_defect'] = pd.qcut(train['data_defect'].astype(int), 10)
train['data_defect'] = label.fit_transform(train['data_defect'])


train.drop(['ID', 'cr_ID', 'assembly_time', 'eq_ID', 'process_time'], inplace=True, axis=1)


train.head(20)

Unnamed: 0,ramp,servo_defect,data_defect,status,assembly_time_date,assembly_time_day,period
0,8,5,0,1,9,0,12
1,8,0,9,1,10,1,3
2,7,4,1,1,6,4,2
3,4,3,6,1,8,6,7
4,0,0,6,1,9,0,15
5,5,0,5,1,10,1,3
6,3,0,5,1,8,6,12
7,4,1,1,1,11,2,6
8,4,2,2,1,8,6,0
9,8,1,9,0,9,0,15


In [5]:
label = LabelEncoder()

# 1. assembly_time 일 추출 => assembly_time_date
test['assembly_time_date'] = test['assembly_time'].str.slice(start=8, stop=10).astype(int)
# 1-1. assembly_time 요일 추출 => assembly_time_day
test['assembly_time_day'] = test['assembly_time'].apply(lambda x : datetime(int(x[0:4]), int(x[5:7]), int(x[8:10])).weekday())

# 2. process_time과 assembly_time 사이 간격 구하기 => period_sec (초단위)
test['assembly_time'] = test['assembly_time'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
test['process_time'] = test['process_time'].apply(lambda x : datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
test['period'] = test['process_time'] - test['assembly_time']
test['period'] = test['period'].apply(lambda x : x.seconds)
# 2-1. period_sec 구간 나누기 => period_sec_bin_code
test['period'] = pd.qcut(test['period'].astype(int), 17)
test['period'] = label.fit_transform(test['period'])

# 3. ramp 구간 나누기
test['ramp'] = pd.qcut(test['ramp'].astype(int), 10)
test['ramp'] = label.fit_transform(test['ramp'])

# 4. servo_defect 구간 나누기
test['servo_defect'] = pd.qcut(test['servo_defect'].astype(int), 6)
test['servo_defect'] = label.fit_transform(test['servo_defect'])

# 5. data_defect 구간 나누기
test['data_defect'] = pd.qcut(test['data_defect'].astype(int), 10)
test['data_defect'] = label.fit_transform(test['data_defect'])


test.drop(['ID', 'cr_ID', 'assembly_time', 'eq_ID', 'process_time'], inplace=True, axis=1)


test.head(20)

train_y = np.ravel(train.status).astype(int) # Make 1D
train.drop(['status'], inplace=True, axis=1)

# 시각화

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# 구간 나누기 함수
def make_bins(d, col, factor=2):
    rounding = lambda x: np.around(x / factor)
    d[col] = d[col].apply(rounding)
    return d

In [None]:
sns.barplot(x="period", y="status", data=train);

In [None]:
sns.barplot(x="assembly_time_date", y="status", data=train);

In [None]:
sns.barplot(x="assembly_time_day", y="status", data=train);

In [None]:
sns.barplot(x="process_time_date", y="status", data=train);

In [None]:
sns.barplot(x="process_time_day", y="status", data=train);

In [None]:
# t = make_bins(train.copy(True), 'ramp', 1200)
sns.barplot(x="ramp", y="status", data=train);

In [None]:
# t = make_bins(train.copy(True), 'servo_defect', 1000)
sns.barplot(x="servo_defect", y="status", data=train);

In [None]:
# t = make_bins(train.copy(True), 'data_defect', 1000)
sns.barplot(x="data_defect", y="status", data=train);

# 케라스 모델 학습

In [18]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()

model.add(Dense(16, activation='relu', input_shape=(6,)))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(train, train_y, epochs=15, batch_size=1, verbose=1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15