# Model Application

In this part of te project we import new data and the models we have created to predict labels. With will upload the results to AWS S3 for be displayed in our frontend

In [238]:
import pandas as pd
import pickle
import numpy as np

In [239]:
# First we must load Column Transformer

ct_filename = 'Model/ct.pkl'
ct = pickle.load(open(ct_filename, 'rb'))

In [240]:
# Read csv with previuos data, we will need to create some labels based on variations respect previous values

diagnosis_filename = 'Data/diagnosis_model.csv'
diagnosis_prev = pd.read_csv(diagnosis_filename, index_col=0)
diagnosis_prev = diagnosis_prev.drop(columns=['Nombre', 'diagnostico'])
diagnosis_prev.head()

Unnamed: 0,machine,point,time,max_mms,rms_mms,max_acc,med_acc,rms_acc,kur_acc,cf_acc,...,e_range,f_range,g_range,h_range,i_range,j_range,max_dem,rms_dem,Id_Estado_Activo,Id_Estado_Activo_fixed
1,S3-M38,M-LOA-H,2019-06-18 12:38:26.205,9.2617,4.066749,0.4694,0.347729,0.148627,0.388228,3.158231,...,0.0,0.0,0.0,0.0,0.0,0.0,0.31838,0.130597,2.0,2.0
3,S3-M38,M-LOA-V,2019-06-18 12:38:41.249,6.4828,2.278911,0.43131,0.349279,0.147235,0.579381,2.929393,...,0.0,0.0,0.0,0.0,0.0,0.0,0.22062,0.103748,2.0,2.0
4,S3-M38,M-LA-H,2019-06-18 12:38:57.866,7.4407,2.977928,0.64757,0.334221,0.141516,0.324311,4.575943,...,0.0,0.0,0.0,0.0,0.0,0.0,0.53557,0.168171,2.0,2.0
7,S3-M38,M-LA-V,2019-06-18 12:39:08.660,6.9886,2.452952,0.72522,0.339139,0.146303,0.600235,4.956958,...,0.0,0.0,0.0,0.0,0.0,0.0,0.55888,0.16994,2.0,2.0
8,S3-M38,M-LA-A,2019-06-18 12:39:20.397,7.3701,3.351391,0.48182,0.343762,0.145565,0.39543,3.309999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.24158,0.095285,2.0,2.0


In [241]:
# Read csv with new values to predict labels

diagnosis_pred_filename = 'Data/diagnosis_2021.csv'
diagnosis_pred = pd.read_csv(diagnosis_pred_filename, index_col =0)


In [242]:
# We must add two columns with dummy data before concatenate with
diagnosis_pred['Id_Estado_Activo'] = 1.0
diagnosis_pred['Id_Estado_Activo_fixed'] = 1.0

In [249]:
# Append dataframes

result = diagnosis_prev.append(diagnosis_pred, sort=False)
result.time

1       2019-06-18 12:38:26.205
3       2019-06-18 12:38:41.249
4       2019-06-18 12:38:57.866
7       2019-06-18 12:39:08.660
8       2019-06-18 12:39:20.397
                 ...           
8932    2021-04-07 12:15:06.604
8933    2021-04-07 12:15:16.954
8934    2021-04-07 12:15:28.784
8935    2021-04-07 12:15:39.724
8936    2021-04-07 12:15:51.904
Name: time, Length: 7880, dtype: object

In [250]:
# And apply all transformations to get features based on Feature Engineering section of "Project Argus.ipynb"

result.index = result.time
result.drop(columns='time', inplace=True)
result['prev_diag'] = result.groupby(['machine','point'])['Id_Estado_Activo'].shift(fill_value=1)

result['pct_change_acc'] = result.groupby(['machine','point']).max_acc.pct_change(fill_method='ffill' )
result['mean_mms_prev'] = result.groupby(['machine', 'point'])['rms_mms'].apply(lambda x: x.shift().expanding().mean())
result['mean_mms_prev'] = result['mean_mms_prev']/result['rms_mms']
result = result.fillna(0)


result['element_type'] = result['point'][0:1] 
result.loc[:, 'element_type'] = result.point.map(lambda x: x[0])


result_predict = result.dropna().sort_index()
result_predict['Id_Estado_Activo_fixed'] = result_predict['Id_Estado_Activo_fixed'].astype('int')
result_predict['prev_diag'] = result_predict['prev_diag'].astype('int')

# We set the Good status as 0 label and Warning and Alarm as label 1.
result_predict['prev_diag'] = result_predict['prev_diag'].replace({1:0, 2:1, 3:1})
result_predict['Id_Estado_Activo_fixed'] = result_predict['Id_Estado_Activo_fixed'].replace({1:0, 2:1, 3:1})


# And finally we only get data from last measurement
result_predict  = result_predict.loc[result_predict.index > '2021-01-01 08:00:00']

In [263]:
# Apply column transformer to data

result_pred = result_predict[['max_mms', 'rms_mms', 'max_acc', 'med_acc',
             'rms_acc', 'kur_acc', 'cf_acc', 'num_peaks', 'max_dem', 'rms_dem','pct_change_acc',
             'a_range', 'b_range', 'c_range', 'd_range', 'e_range', 'f_range', 'g_range', 'h_range',
             'i_range', 'j_range', 'mean_mms_prev', 'element_type', 'prev_diag']]
x_pred = ct.transform(result_pred)

In [264]:
# Load the model from pickle file

model_filename = 'Model/gcbmodel.sav'
model = pickle.load(open(model_filename, 'rb'))

In [265]:
# And predict data

y_pred = model.predict(x_pred)
result_predict['prediction'] = y_pred

In [271]:
result_predict[result_predict['prediction']==1].groupby('machine').count()

Unnamed: 0_level_0,point,max_mms,rms_mms,max_acc,med_acc,rms_acc,kur_acc,cf_acc,num_peaks,a_range,...,j_range,max_dem,rms_dem,Id_Estado_Activo,Id_Estado_Activo_fixed,prev_diag,pct_change_acc,mean_mms_prev,element_type,prediction
machine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S1-M01,6,6,6,6,6,6,6,6,6,6,...,6,6,6,6,6,6,6,6,6,6
S1-M02,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
S1-M05,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
S1-M06,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
S1-M08,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
S6-76-M2,9,9,9,9,9,9,9,9,9,9,...,9,9,9,9,9,9,9,9,9,9
S6-77-M1,9,9,9,9,9,9,9,9,9,9,...,9,9,9,9,9,9,9,9,9,9
S6-77-M2,5,5,5,5,5,5,5,5,5,5,...,5,5,5,5,5,5,5,5,5,5
S6-78-M1,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2


In [272]:
threshold = 0.8

y_pred = (model.predict_proba(x_pred)[:,1] >= threshold).astype(bool)

In [274]:
result_predict['prediction_thr'] = y_pred

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_predict['prediction_thr'] = y_pred


In [275]:
result_predict[result_predict['prediction_thr']==True]

Unnamed: 0_level_0,machine,point,max_mms,rms_mms,max_acc,med_acc,rms_acc,kur_acc,cf_acc,num_peaks,...,max_dem,rms_dem,Id_Estado_Activo,Id_Estado_Activo_fixed,prev_diag,pct_change_acc,mean_mms_prev,element_type,prediction,prediction_thr
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-04-06 10:43:25.313,S3-M02,V-LOA-V,11.8500,3.080179,0.72477,0.346719,0.151017,0.050923,4.799271,0.0,...,0.64791,0.275261,1.0,0,0,0.000000,0.000000,V,1,True
2021-04-06 10:50:47.393,S3-M08,V-LA-H,10.6710,3.916375,1.55170,0.577071,0.417972,0.016573,3.712452,1.0,...,1.60840,0.771354,1.0,0,0,0.000000,0.000000,V,1,True
2021-04-06 10:52:36.843,S3-M09,V-LA-H,10.3490,3.463561,3.14720,0.740133,0.694085,0.343381,4.534312,2.0,...,3.87440,1.453441,1.0,0,0,0.000000,0.000000,V,1,True
2021-04-06 10:55:15.973,S3-M11,V-LOA-H,4.9616,1.562987,4.55510,0.919514,1.085404,0.550107,4.196687,13.0,...,5.33570,1.900803,1.0,0,0,0.836364,0.893412,V,1,True
2021-04-06 11:01:17.293,S3-M15,M-LA-A,24.8260,7.578205,1.03940,0.459596,0.265143,0.064259,3.920148,1.0,...,0.92277,0.385649,1.0,0,0,0.495690,0.245238,M,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-04-07 12:12:08.895,S5-61-M1,M-LOA-H,12.5410,5.145843,0.48409,0.354414,0.155025,0.360873,3.122655,0.0,...,0.35381,0.171177,1.0,0,0,-0.413374,0.937114,M,1,True
2021-04-07 12:12:17.048,S5-61-M1,M-LOA-V,12.0230,4.589507,0.75057,0.413831,0.214844,0.000150,3.493556,2.0,...,0.55306,0.255747,1.0,0,0,-0.088583,1.199479,M,1,True
2021-04-07 12:12:25.714,S5-61-M1,M-LA-H,10.0680,4.147988,0.41422,0.314938,0.122802,0.149877,3.373080,0.0,...,0.25401,0.103828,1.0,0,0,-0.229874,0.747792,M,1,True
2021-04-07 12:12:41.314,S5-61-M1,M-LA-A,10.7160,4.249784,0.39552,0.320196,0.126277,0.316442,3.132152,0.0,...,0.27051,0.122878,1.0,0,0,-0.262186,1.050316,M,1,True


In [283]:
result_point = result_predict[result_predict['prediction_thr']==True]
result_point = result_point.groupby(['machine', result.point.str[:4]]).count()
result_point = result_point.drop(columns=['point'])
result_point = result_point[result_point['max_mms']>1]

In [286]:
result_point

Unnamed: 0_level_0,Unnamed: 1_level_0,max_mms,rms_mms,max_acc,med_acc,rms_acc,kur_acc,cf_acc,num_peaks,a_range,b_range,...,max_dem,rms_dem,Id_Estado_Activo,Id_Estado_Activo_fixed,prev_diag,pct_change_acc,mean_mms_prev,element_type,prediction,prediction_thr
machine,point,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
S1-M01,M-LA,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
S1-M01,M-LO,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
S1-M09,V-LO,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
S1-M19,M-LA,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
S1-M19,V-LA,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
S6-77-M1,M-LA,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
S6-77-M1,M-LO,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
S6-77-M1,V-LA,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
S6-77-M1,V-LO,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2


In [278]:
# Only get point positive if more than one measurement is positive

result_point = result_point.groupby(['machine', result_point.point.str[:4]]).count()
result_point = result_point.drop(columns=['point'])
result_point = result_point[result['max_mms']>1]
maquinas = result_point.reset_index().machine.unique()
len(result)

AttributeError: Can only use .str accessor with string values!