In [1]:
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt

from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier


In [2]:
def readfile(bestand):
    table = None
    table = pd.read_excel(bestand, index_col=0)
    table['time'] = table.index
    
    X = table.select_dtypes(include='number')
    y = pd.get_dummies(table.select_dtypes(exclude='number') )
    y.drop(columns='Health_state_H', inplace=True)
    
    return X,y

In [3]:
def make_arr(XX):
    arr = pd.DataFrame(index=XX.index)
    arr[' ARR_1  '] = abs( (XX['V0_measured']-XX['V1_measured']) - (XX['V1_measured']-XX['V2_measured']) )
    
    dumV = XX['S'].diff().fillna(1).abs()*(XX['V0_measured']-XX['V2_measured'])
    dumV.replace(to_replace=0.0, inplace=True, method='ffill')
    
    dumT = XX['S'].diff().fillna(1).abs()*(XX.index)             #identify all lines immediately after a switch transition
    dumT.replace(to_replace=0.0, inplace=True, method='ffill')   #forward the last switch time 

    arr[' ARR_2  '] = abs( (XX['V0_measured']-XX['V2_measured']) - dumV*np.exp(-(dumT.index - dumT)/4)  )
    
    return arr

In [4]:
# fit a RF model and determine gini importance

def selectRFmodel(XX,y):
#    imputer = KNNImputer(n_neighbors=3, weights="distance")
#    XX = imputer.fit_transform(XX)
    
    rfmodel = RandomForestClassifier(n_estimators = 100, criterion="gini",random_state = None)
    
    return rfmodel.fit(XX, y), rfmodel.feature_importances_

In [5]:
#load the balanced and complete training data in a pandas dataframe

#df_X, df_y = readfile("C:/Users/Admin/Pythonprojects/RAMS/data/Arduino_R10kR10kC200mu_10sec.xlsx")
df_X, df_y = readfile("https://raw.githubusercontent.com/chrisrijsdijk/RAMS/master/data/Arduino_R10kR10kC200mu_10sec.xlsx")

In [6]:
# infer RF models from df_X; df_y

X_names = df_X.columns      #load a string of column names to label the output of the random forest model afterwards...

rf_X_RC_model, rf_X_RC_featimp = selectRFmodel(df_X, df_y)

In [7]:
# infer RF models from arr's; df_y

df_as = make_arr(df_X)
as_names = df_as.columns      #load a string of column names to label the output of the random forest model afterwards...

rf_A_RC_model, rf_A_RC_featimp = selectRFmodel(df_as, df_y)

In [8]:
#rf_XA_RC_model.predict(df_Xas)
print('###########################################################################')
print(pd.DataFrame(rf_X_RC_featimp, index=X_names, columns=['Gini importance of the model X']).sort_values(by='Gini importance of the model X', ascending=False) )
print(' ')
print(pd.DataFrame(rf_A_RC_featimp, index=as_names, columns=['Gini importance of the model A']).sort_values(by='Gini importance of the model A', ascending=False) )
print('###########################################################################')

###########################################################################
             Gini importance of the model X
V2_measured                        0.399307
V1_measured                        0.356468
time                               0.218663
V0_measured                        0.020383
S                                  0.005179
 
          Gini importance of the model A
 ARR_2                           0.50421
 ARR_1                           0.49579
###########################################################################


In [9]:
import serial
import time
import keyboard
from IPython.display import clear_output

In [17]:
# connect to Arduino

ser = serial.Serial('COM3', 9600, timeout=0.2) #check whether the Arduino is really connected with COM3 or adjust the code to the correct COM
                                             #check at settings and check whether Arduino is a connected device
time.sleep(2)

In [18]:
# read write the serial connection at a given sampling rate

def write_read(x):                  #this function takes an input x, which is a switch command and it returns data from the Arduino
    ser.write(bytes(x, 'utf-8'))    #pass the command to Arduino by the serial connection
    data = ser.readline()           #take data from Arduino
    return data                     #return the data to be processed further

In [19]:
# start to control the Arduino while collecting the data

diagnose = {}                                                    #just a dict to connect rf output to semantical fault labels
diagnose['[1 0]'] = 'C fault'
diagnose['[0 0]'] = 'Healthy'
diagnose['[0 1]'] = 'R fault'
diagnose['[1 1]'] = 'RC fault'

diagnoses = pd.DataFrame()
dum8 = pd.DataFrame()
pd.options.display.width = 100
pd.options.display.max_colwidth = 8

print('The switching period in the training set was 10 seconds')
print('Choose a switching period for the validation:')
try:
    period = int(input('Input:'))                                #state holding time of the switch
except ValueError:
    print("Not an integer")

num ='1'                                                         #num='1' is output to close the switch
value = write_read(num)                                          #sent out switch command and take sensor readings
        
t = 0.0                                                          #timer on the state holding time of the switch
tt = 0.0                                                         #timer on a switch cyle (on+off)
tijd = time.time()                                               #read current time
        
dummy = value.decode().rstrip().split()                          #process sensor readings
print(dummy)
dumV = (int(dummy[1])-int(dummy[3]))/1023*5                      #scale the voltages at the moment of a switch transition

dum4 = np.array([[int(dummy[0])]])                               #dum4 indicates the switch position
                                                                 #dum_X contains all sensor features switch position, voltages, time t+tt
dum_X = np.array([ np.append(dum4, np.append([int(j)/1023*5 for j in dummy[1:]],t+tt) ) ])

                                                                 #calculate arr1, arr2
arr1 = abs((  (int(dummy[1])-int(dummy[2])) - (int(dummy[2])-int(dummy[3])) )/1023*5 ) 
arr2 = abs(   (int(dummy[1])-int(dummy[3]))/1023*5 - dumV*np.exp(-t/4)   )
dum_A = np.array( [[arr1, arr2]] )                               #dum_A contains all arrs

while not(keyboard.is_pressed('q')):                             #making a loop continues as long as i is true. This loop involves a cycle of reading and evaluating data
    clear_output(wait=True)
    print("The switching period during this validation is: ", period)
    print("Press <q> to quit")
    print("#########################################################")
          
    print('time:                                         ',str(pd.to_datetime(round(time.time(), 0),unit = 's')))
    print('.........................................................')                                     
    
    try:
        dum6 = diagnose[str(rf_X_RC_model.predict(dum_X)[0])]
        if t==0:
            dum7 = '-'
            
        elif (  (abs(dum_X[0][1]-dum_X[0][2])<0.02) | (abs(dum_X[0][3]-dum_X[0][2])<0.02) ):
            dum7 = '-'
            
        else:
            dum7 = diagnose[str(rf_A_RC_model.predict(dum_A)[0])]

        dum8 = pd.concat([pd.DataFrame(dum_X, columns=['S', '   V0   ', '   V1   ', '   V2   ', '  time  ']),
                          pd.DataFrame(dum_A, columns=as_names), 
                          pd.DataFrame([dum6], columns=[' X_label']), 
                          pd.DataFrame([dum7], columns=[' A_label'])], axis='columns')  
        
        diagnoses = pd.concat([diagnoses, dum8])
        
        print(dum8)
        
        print('.........................................................')

        print('frequencies of random forest classification from measurements:        ')
        print(diagnoses[' X_label'].value_counts())
        
        print(' ')
        
        print('frequencies of random forest classification from arrs                 ')
        print(diagnoses[' A_label'].value_counts())

    except:
        print(' ')
              
    if t < period-1:                                             #check t<the entered state holding time of the switch; -1 to avoid time exceedance
        value = write_read(num)                                  #take the sensorreading while continuing the switch position num
        
        t = time.time()-tijd                                     #refresh t
        
        dummy = value.decode().rstrip().split()                  #process sensor readings
        
        dum4 = np.array([[int(dummy[0])]])                       #dum4 indicates the switch position
                                                                 #dum_X contains all sensor features switch position, voltages, time t+tt        
        dum_X = np.array([ np.append(dum4, np.append([int(j)/1023*5 for j in dummy[1:]],t+tt) ) ])
                                                                 #calculate arr1, arr2        
        arr1 = abs((  (int(dummy[1])-int(dummy[2])) - (int(dummy[2])-int(dummy[3])) )/1023*5 ) 
        arr2 = abs(   (int(dummy[1])-int(dummy[3]))/1023*5 - dumV*np.exp(-t/4)   )
        dum_A = np.array( [[arr1, arr2]] )                       #dum_A contains all arrs
        
    else:
        num = str( abs(int(num) - 1) )                           #flip the switch position from zero to one or conversely
        value = write_read(num)                                  #take the sensorreading upon the switch transition
        
        t = 0.0                                                  #reset the timer on the state holding time
        tt = int(num) * t                                        #refresh the timer on the switching cycle
        tijd = time.time()                                       #refresh tijd of the last switch transition
        
        dummy = value.decode().rstrip().split()                  #process sensor readings

        dumV = (int(dummy[1])-int(dummy[3]))/1023*5              #scale the voltages at the moment of a switch transition
        
        dum4 = np.array([[int(dummy[0])]])                       #dum4 indicates the switch position
                                                                 #dum_X contains all sensor features switch position, voltages, time t+tt        
        dum_X = np.array([ np.append(dum4, np.append([int(j)/1023*5 for j in dummy[1:]],t+tt) ) ])
                                                                 #calculate arr1, arr2        
        arr1 = abs((  (int(dummy[1])-int(dummy[2])) - (int(dummy[2])-int(dummy[3])) )/1023*5 ) 
        arr2 = abs(   (int(dummy[1])-int(dummy[3]))/1023*5 - dumV*np.exp(-t/4)   )
        dum_A = np.array( [[arr1, arr2]] )                       #dum_A contains all arrs
    
print('You pressed <q>uit')
ser.close()       #close the serial port when the reading is stopped. If not done, reconnection with the Arduino is not possible

The switching period during this validation is:  9
Press <q> to quit
#########################################################
time:                                          2025-01-29 15:14:26
.........................................................
     S     V0        V1        V2       time     ARR_1     ARR_2    X_label  A_label
0  1.0      5.0   4.65...   4.32...   7.86...   0.00...   0.06...   Healthy  Healthy
.........................................................
frequencies of random forest classification from measurements:        
Healthy    355
C fault     22
Name:  X_label, dtype: int64
 
frequencies of random forest classification from arrs                 
Healthy    358
-           13
C fault      6
Name:  A_label, dtype: int64
You pressed <q>uit


In [13]:
ser.close()

In [14]:
diagnoses#.to_excel('Val_R10kR05kC200mu_05sec_R.xlsx')


Unnamed: 0,S,V0,V1,V2,time,ARR_1,ARR_2,X_label,A_label
0,1.0,4.99...,2.49...,0.00...,0.00...,0.00...,0.00...,C fault,-
0,1.0,4.99...,3.15...,1.32...,1.25...,0.00...,0.02...,Healthy,Healthy
0,1.0,5.00...,3.58...,2.17...,2.35...,0.00...,0.04...,Healthy,Healthy
0,1.0,5.00...,3.91...,2.83...,3.46...,0.00...,0.06...,Healthy,Healthy
0,1.0,4.99...,4.15...,3.30...,4.51...,0.00...,0.07...,Healthy,Healthy
0,1.0,5.00...,4.34...,3.69...,5.63...,0.00...,0.07...,Healthy,Healthy
0,1.0,5.00...,4.49...,4.00...,6.77...,0.00...,0.07...,Healthy,Healthy
0,1.0,5.00...,4.60...,4.22...,7.86...,0.00...,0.07...,Healthy,Healthy
0,1.0,5.00...,4.69...,4.39...,8.99...,0.00...,0.07...,Healthy,Healthy
0,0.0,0.00...,2.27...,4.53...,0.00...,0.00...,0.00...,Healthy,-


In [15]:
diagnoses.groupby([' X_label']).agg({'S':'size',                                     
                                     '   V0   ':['mean','min','max'],
                                     '   V1   ':['mean','min','max'],
                                     '   V2   ':['mean','min','max'],
                                     '  time  ':['mean','min','max']
                                    })

Unnamed: 0_level_0,S,V0,V0,V0,V1,V1,V1,V2,V2,V2,time,time,time
Unnamed: 0_level_1,size,mean,min,max,mean,min,max,mean,min,max,mean,min,max
X_label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
C fault,1,4.99...,4.99...,4.99...,2.49...,2.49...,2.49...,0.00...,0.0,0.00...,0.00...,0.0,0.00...
Healthy,50,2.59...,0.00...,5.00...,2.61...,0.26...,4.71...,2.62...,0.42522,4.56...,4.29...,0.0,8.99...


In [16]:
diagnoses.groupby([' A_label']).agg({'S':'size',                                     
                                     '   V0   ':['mean','min','max'],
                                     '   V1   ':['mean','min','max'],
                                     '   V2   ':['mean','min','max'],
                                     '  time  ':['mean','min','max']
                                    })

Unnamed: 0_level_0,S,V0,V0,V0,V1,V1,V1,V2,V2,V2,time,time,time
Unnamed: 0_level_1,size,mean,min,max,mean,min,max,mean,min,max,mean,min,max
A_label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
-,6,2.49...,0.0,5.0,2.45...,2.27...,2.71261,2.41...,0.0,4.56...,0.00...,0.00...,0.00...
Healthy,45,2.66...,0.0,5.0,2.63...,0.26...,4.71652,2.59...,0.54741,4.43...,4.77...,1.04...,8.99...
