# Model Application

In this part of te project we import new data and the models we have created to predict labels. With will upload the results to AWS S3 for be displayed in our frontend

In [13]:
import pandas as pd
import pickle
import numpy as np

In [14]:
# First we must load Column Transformer

ct_filename = 'Model/ct.pkl'
ct = pickle.load(open(ct_filename, 'rb'))

In [15]:
# Read csv with previuos data, we will need to create some labels based on variations respect previous values

diagnosis_filename = 'Data/diagnosis_model.csv'
diagnosis_prev = pd.read_csv(diagnosis_filename, index_col=0)
diagnosis_prev = diagnosis_prev.drop(columns=['Nombre', 'diagnostico'])
diagnosis_prev.head()

Unnamed: 0,machine,point,time,max_mms,rms_mms,max_acc,med_acc,rms_acc,kur_acc,cf_acc,...,e_range,f_range,g_range,h_range,i_range,j_range,max_dem,rms_dem,Id_Estado_Activo,Id_Estado_Activo_fixed
0,S3-M38,M-LOA-H,2019-06-18 12:38:26.205,9.2617,4.066749,0.4694,0.347729,0.148627,0.388228,3.158231,...,0.0,0.0,0.0,0.0,0.0,0.0,0.31838,0.130597,2.0,2.0
1,S3-M38,M-LOA-V,2019-06-18 12:38:41.249,6.4828,2.278911,0.43131,0.349279,0.147235,0.579381,2.929393,...,0.0,0.0,0.0,0.0,0.0,0.0,0.22062,0.103748,2.0,2.0
2,S3-M38,M-LA-H,2019-06-18 12:38:57.866,7.4407,2.977928,0.64757,0.334221,0.141516,0.324311,4.575943,...,0.0,0.0,0.0,0.0,0.0,0.0,0.53557,0.168171,2.0,2.0
3,S3-M38,M-LA-V,2019-06-18 12:39:08.660,6.9886,2.452952,0.72522,0.339139,0.146303,0.600235,4.956958,...,0.0,0.0,0.0,0.0,0.0,0.0,0.55888,0.16994,2.0,2.0
4,S3-M38,M-LA-A,2019-06-18 12:39:20.397,7.3701,3.351391,0.48182,0.343762,0.145565,0.39543,3.309999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.24158,0.095285,2.0,2.0


In [16]:
# Read csv with new values to predict labels

diagnosis_pred_filename = 'Data/diagnosis_2021.csv'
diagnosis_pred = pd.read_csv(diagnosis_pred_filename, index_col =0)

In [17]:
# We must add two columns with dummy data before concatenate with
diagnosis_pred['Id_Estado_Activo'] = 1.0
diagnosis_pred['Id_Estado_Activo_fixed'] = 1.0

In [18]:
# Append dataframes

result = diagnosis_prev.append(diagnosis_pred, sort=False)

0       2019-06-18 12:38:26.205
1       2019-06-18 12:38:41.249
2       2019-06-18 12:38:57.866
3       2019-06-18 12:39:08.660
4       2019-06-18 12:39:20.397
                 ...           
8932    2021-04-07 12:15:06.604
8933    2021-04-07 12:15:16.954
8934    2021-04-07 12:15:28.784
8935    2021-04-07 12:15:39.724
8936    2021-04-07 12:15:51.904
Name: time, Length: 8870, dtype: object

In [19]:
# And apply all transformations to get features based on Feature Engineering section of "Project Argus.ipynb"

result.index = result.time
result.drop(columns='time', inplace=True)
result['prev_diag'] = result.groupby(['machine','point'])['Id_Estado_Activo'].shift(fill_value=1)

result['pct_change_acc'] = result.groupby(['machine','point']).max_acc.pct_change(fill_method='ffill' )
result['mean_mms_prev'] = result.groupby(['machine', 'point'])['rms_mms'].apply(lambda x: x.shift().expanding().mean())
result['mean_mms_prev'] = result['mean_mms_prev']/result['rms_mms']
result = result.fillna(0)


result['element_type'] = result['point'][0:1] 
result.loc[:, 'element_type'] = result.point.map(lambda x: x[0])


result_predict = result.dropna().sort_index()
result_predict['Id_Estado_Activo_fixed'] = result_predict['Id_Estado_Activo_fixed'].astype('int')
result_predict['prev_diag'] = result_predict['prev_diag'].astype('int')

# We set the Good status as 0 label and Warning and Alarm as label 1.
result_predict['prev_diag'] = result_predict['prev_diag'].replace({1:0, 2:1, 3:1})
result_predict['Id_Estado_Activo_fixed'] = result_predict['Id_Estado_Activo_fixed'].replace({1:0, 2:1, 3:1})


# And finally we only get data from last measurement
result_predict  = result_predict.loc[result_predict.index > '2021-01-01 08:00:00']

In [20]:
# Apply column transformer to data

result_pred = result_predict[['max_mms', 'rms_mms', 'max_acc', 'med_acc',
             'rms_acc', 'kur_acc', 'cf_acc', 'num_peaks', 'max_dem', 'rms_dem','pct_change_acc',
             'a_range', 'b_range', 'c_range', 'd_range', 'e_range', 'f_range', 'g_range', 'h_range',
             'i_range', 'j_range', 'mean_mms_prev', 'element_type', 'prev_diag']]
x_pred = ct.transform(result_pred)

In [21]:
# Load the model from pickle file

model_filename = 'Model/gcbmodel.sav'
model = pickle.load(open(model_filename, 'rb'))

In [22]:
# And predict data

y_pred = model.predict(x_pred)
result_predict['prediction'] = y_pred

We are going to set a machine "To Analyze" if more than three points of the same element (Fan or Motor) is stablished to taht state. We must group the dataframe and filer it.

In [27]:
result_by_element = result_predict[result_predict['prediction']==1].groupby(['machine','element_type']).count()
machines = resuld_by_element[resuld_by_element['point'] > 4].reset_index().machine.unique()
result_positive = result_predict[result_predict['prediction']==1]
result_positive_filtered = result_positive[result_positive['machine'].isin(maquinas)]

In [54]:
# And finanlly save the dataframe to a csv and upload it to S3 service:

result_positive_filtered.to_csv('results.csv')