# 機器故障予知デモ

機器からあがってくるセンサーデータをつかって、将来エラーが起きる機器を予測します

# 初期化

In [1]:
import sklearn

import brunel

import warnings
warnings.filterwarnings('ignore')

from scipy.stats import chi2_contingency,ttest_ind
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


from sklearn.cross_validation import train_test_split, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score, roc_curve, roc_auc_score

import numpy as np

import urllib3, requests, json


# データの読み込み

In [2]:


import os, pandas as pd
# Add asset from file system
df = pd.read_csv(os.environ['DSX_PROJECT_DIR']+'/datasets/Cond3n_e.csv')
df.head()



Unnamed: 0,M_CD,UP_TIIME,POWER,TEMP,POWER_DIFF,TEMP_DIFF,POWER_5MAVG,TEMP_5MAVG,ERR_CD_5FUTURE
0,1000,0,948,250,0,0,0.0,0.0,0
1,1000,1,945,250,-3,0,0.0,0.0,0
2,1000,2,944,250,-1,0,0.0,0.0,0
3,1000,3,943,250,-1,0,0.0,0.0,0
4,1000,4,940,250,-3,0,-1.6,0.0,0


# データの概略を確認

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5802 entries, 0 to 5801
Data columns (total 9 columns):
M_CD              5802 non-null int64
UP_TIIME          5802 non-null int64
POWER             5802 non-null int64
TEMP              5802 non-null int64
POWER_DIFF        5802 non-null int64
TEMP_DIFF         5802 non-null int64
POWER_5MAVG       5802 non-null float64
TEMP_5MAVG        5802 non-null float64
ERR_CD_5FUTURE    5802 non-null int64
dtypes: float64(2), int64(7)
memory usage: 408.0 KB


In [4]:
pd.options.display.precision = 2
df.describe(include='all')

Unnamed: 0,M_CD,UP_TIIME,POWER,TEMP,POWER_DIFF,TEMP_DIFF,POWER_5MAVG,TEMP_5MAVG,ERR_CD_5FUTURE
count,5802.0,5802.0,5802.0,5802.0,5802.0,5802.0,5802.0,5802.0,5802.0
mean,715.12,48.71,941.28,254.45,-1.06,0.08,-0.96,0.08,38.26
std,388.21,39.35,73.59,14.68,4.02,0.66,1.9,0.27,78.35
min,104.0,0.0,640.0,242.0,-150.0,-2.0,-33.4,-0.4,0.0
25%,238.0,20.0,903.0,250.0,-2.0,0.0,-1.4,0.0,0.0
50%,1009.0,41.0,927.0,251.0,-1.0,0.0,-0.8,0.0,0.0
75%,1029.0,63.0,952.0,252.0,0.0,0.0,0.0,0.0,0.0
max,1049.0,203.0,1182.0,329.0,50.0,21.0,7.2,5.0,303.0


# エラーコードの確認

3種類のエラーがある

In [5]:
df['ERR_CD_5FUTURE'].value_counts()

0      4504
202     574
101     561
303     163
Name: ERR_CD_5FUTURE, dtype: int64

In [6]:
%brunel data('df') bar x(ERR_CD_5FUTURE) y(#count) color(ERR_CD_5FUTURE)

<IPython.core.display.Javascript object>

# データの中身を確認

M_CD: マシンコード<BR>
UP_TIIME: 起動時間<BR>
POWER: 電力<BR>
TEMP: 温度<BR>
POWER_DIFF: 電力差分<BR>
TEMP_DIFF: 温度差分<BR>
POWER_5MAVG: 電力差分5期移動平均<BR>
TEMP_5MAVG: 温度差分5期移動平均<BR>
ERR_CD_5FUTURE: 5期先エラーコード<BR>

In [7]:
df.head(3720)

Unnamed: 0,M_CD,UP_TIIME,POWER,TEMP,POWER_DIFF,TEMP_DIFF,POWER_5MAVG,TEMP_5MAVG,ERR_CD_5FUTURE
0,1000,0,948,250,0,0,0.0,0.0,0
1,1000,1,945,250,-3,0,0.0,0.0,0
2,1000,2,944,250,-1,0,0.0,0.0,0
3,1000,3,943,250,-1,0,0.0,0.0,0
4,1000,4,940,250,-3,0,-1.6,0.0,0
5,1000,5,938,250,-2,0,-2.0,0.0,0
6,1000,6,936,250,-2,0,-1.8,0.0,0
7,1000,7,936,250,0,0,-1.6,0.0,0
8,1000,8,935,251,-1,1,-1.6,0.2,0
9,1000,9,934,251,-1,0,-1.2,0.2,0


# 温度と時間の関係

e202は温度が高いときに発生、e101は電力が低いときにおきやすい

In [8]:
import brunel
#%brunel data('df') x(POWER) y(TEMP) color(ERR_CD) :: width=800, height=400
%brunel data('df') x(POWER) y(TEMP) color(ERR_CD_5FUTURE)

<IPython.core.display.Javascript object>

# 時間推移によるエラーコードの変化

e101、e303は電力低下時におきている

In [9]:
%brunel data('df') x(UP_TIIME) y(POWER) color(ERR_CD_5FUTURE)

<IPython.core.display.Javascript object>

# エラーのみで電力の移動平均を比較

e101は電力低下時に発生している。e303も電力低下時に多く発生している

In [10]:
dferr=df[df.ERR_CD_5FUTURE != 0]
%brunel data('dferr') stack  bar x(POWER_5MAVG)  y(#count) color(ERR_CD_5FUTURE)

<IPython.core.display.Javascript object>

# モデリング

In [11]:
# convert IS_DEFAULT to 1/0
#le = LabelEncoder()

#df.loc[:,'ERR_CD_5FUTURE']= le.fit_transform(df.loc[:,'ERR_CD_5FUTURE'])

y = df.ERR_CD_5FUTURE

# drop y 
X = df.drop(['ERR_CD_5FUTURE'], axis = 1)
#df.head(10)

In [12]:
from sklearn_pandas import DataFrameMapper

mapper = DataFrameMapper(
    [
     ('POWER', None),
     ('TEMP', None),
     ('POWER_DIFF', None),
     ('TEMP_DIFF', None),
     ('POWER_5MAVG', None),
     ('TEMP_5MAVG', None)]
)

In [13]:
# split the data to training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [14]:
import sklearn.pipeline
from sklearn.preprocessing import OneHotEncoder

random_forest = RandomForestClassifier()
steps = [('mapper', mapper),('RandonForestClassifier', random_forest)]
pipeline = sklearn.pipeline.Pipeline(steps)
model=pipeline.fit( X_train, y_train )
model

Pipeline(memory=None,
     steps=[('mapper', DataFrameMapper(default=False, df_out=False,
        features=[('POWER', None), ('TEMP', None), ('POWER_DIFF', None), ('TEMP_DIFF', None), ('POWER_5MAVG', None), ('TEMP_5MAVG', None)],
        input_df=False, sparse=False)), ('RandonForestClassifier', RandomForestClassifier(bootstr...n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False))])

# モデルの評価

In [15]:
### call pipeline.predict() on your X_test data to make a set of test predictions
y_prediction = pipeline.predict( X_test )
### test your predictions using sklearn.classification_report()
report = sklearn.metrics.classification_report( y_test, y_prediction )
### and print the report
print(report)

             precision    recall  f1-score   support

          0       0.99      1.00      0.99       901
        101       0.96      0.88      0.92       112
        202       1.00      1.00      1.00       115
        303       0.97      0.91      0.94        33

avg / total       0.98      0.98      0.98      1161



# MLのレポジトリへ保存

In [None]:
#convert the y_test array into a pandas dataframe
y_test_df = pd.DataFrame(y_test,columns=['ERR_CD_5FUTURE'])

In [None]:
from dsx_ml.ml import save

model_name = "MachineErr_SL"
save(model = model, name = model_name, x_test=X_test, y_test=y_test_df, algorithm_type = 'Classification')

# スコアリング

ノートブックを保存し、プロジェクトの[models]タブに切り替えます（ヒント：プロジェクト名のリンクを右クリックし、ブラウザの別のタブで開き、上部にあるModelsタブをクリックします）。
Modelsの下で、保存したモデルを見つけてクリックします。
テストリンクをクリックしてモデルをテストします。 次のデータを使用してテストすることができます。
M_CD: 111
UP_TIIME: 66
POWER: 1102
TEMP: 254
POWER_DIFF: 31
TEMP_DIFF: 21
POWER_5MAVG: 10
TEMP_5MAVG: 7

セッション終了

In [None]:
%%javascript
Jupyter.notebook.session.delete();

<IPython.core.display.Javascript object>