In [968]:
import serial
import time
import pandas as pd
import scipy
from scipy import signal
import numpy as np
import math
from statistics import mean
import datetime
from datetime import date
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from datetime import datetime,timedelta

In [72]:
import pickle
global clf
clf = pickle.load(open('testmodel.pkl','rb')) # this would be the master ml model

In [980]:
def event_detection(data):
    '''
    Detects devices being turned On
    Returns the timestamp indices where devices are turned on
    '''
    data = scipy.signal.medfilt(data, kernel_size=None) #remove any error spikes
    dbydx = np.gradient(data) # find edges
#     plt.plot(dbydx);
    on_events = [] # maybe convert to numpy?
    for idx,x in enumerate(dbydx):
        if x > THRSHLD:
            on_events.append(idx)
    for x in on_events:
        for y in range(x+1,x+91): # to remove false turn on signatures, give delay for transition
            if y in on_events:
                on_events.remove(y)
    return on_events

In [981]:
def get_signal (length):
    '''
    Takes the length of signature required, (two cases : full length, reconstruct)
    returns signature and timestamps
    
    '''
#     start = time.time() 
    count = 0
    buffer = np.array([])
    time_buffer = np.array([])
    while count!= length :
        b = ser.readline()         # read a byte string
        string_n = b.decode(errors='ignore')  # decode byte string into Unicode  
        try:
            val, timestamp = string_n.split(sep = ',')
            val = float(val) #convert string to float
#             timestamp = pd.to_datetime(timestamp) # moved timestamp conversion to time span calculation code
            buffer = np.append(buffer, val) # helps getting data in seconds
            time_buffer = np.append(time_buffer,timestamp)
            count += 1
        except:
            print('read failed, retrying!') # typical serial comm error
    print(string_n)
#     print(f'{time.time()-start:.3} s') 
    return buffer,time_buffer

In [982]:
def retrain_model(local_db,local_names):
    '''
    Create a new local model from  local signatures and names!!!
    Returns local classifier model!
    
    '''
    clf = RandomForestClassifier()
    local_db = local_db.reshape((local_db.shape[0]//SIG_SIZE,SIG_SIZE))
    local_names = local_names.reshape(local_names.shape[0],1)
    
    X = local_db
    y = local_names

    X_train , X_test , y_train , y_test = train_test_split(X,y,test_size = 0.2,random_state=42)

    clf.fit(X_train,y_train)
    
    pickle.dump(clf,open(f'localmodel-{str(date.today())}.pkl','wb'))
    
    return clf

In [996]:
def update_local_db(sig,name):
    '''
    Keeps updating the local signatures list and corresonding names
    
    '''
    global local_db
    global local_names
    local_db = np.append(local_db,sig)
    local_names = np.append(local_names,name)

In [984]:
def log_event(name,sig_ts,init_rise,rise,sig):
    '''
    Stores the following:
    1. Signature of appliance,
    2. Time-stamp of turning ON
    3. Name
    4. Rise in consumption
    
    '''
    global logs_df
    global prev_mean #simple case
    
    sig = sig.reshape(SIG_SIZE)
    logs_df = logs_df.append(pd.DataFrame({'Sig':[sig],
                                           'Name':name,
                                           'Turned ON':pd.to_datetime(sig_ts),
                                           'Init Rise':init_rise,
                                           'Current Value':rise,
#                                            'Fall':prev_mean, # causing problem i guess
                                           'Turned OFF':0,
                                           'Flag':0},index=[0]),ignore_index=True) # Flag 0 means ON and 1 mean OFF

In [985]:
def track_device(num,val,ts):
    
    '''
    Determines the time when the appliance is turned OFF.
    Takes in the device serial number that was turned ON, Current Value, and timstamp.
    Returns 1 for device turned OFF and 0 elsewise.
    
    '''
    global logs_df
#     global active_appliances # redundant? not used
#     start = time.time()
    
    
    if logs_df['Flag'][num] == 0: #to check if the appliance is still ON 
        nm = logs_df['Name'][num]
        print(f' {nm} is being tested for being turned OFF') # works fine
        total_rise = logs_df['Current Value'][num] #steady state value or current value
#         init_rise = logs_df['Init Rise'][num] # redundant?? moved to only one appliance case
#         fall = logs_df['Fall'][num] #redundant? 
        offset = total_rise - val # gives current rise or fall 
        total_rise = total_rise - offset # while calculating remember turning off a device makes the remaining value of the left device
        logs_df.loc[[num],['Current Value']] = total_rise
        
        # absolute tolerance may cause errors for device wih almost similar consumption, trigerring false turn off.
        
        if (logs_df['Flag']==0).sum() > 1: # number of active appliances are greater than 1
            arr_actv_indices = logs_df[logs_df['Flag']==0].index.values # gets only on devices indices
            arr_actv_indices = np.delete(arr_actv_indices, np.where(arr_actv_indices == num)) # remove this device's rise
            print(f'number of active devices excluding {nm }are {len(arr_actv_indices)}')
            cumul_rise = 0
            
            
            for x in arr_actv_indices: # sum all other appliance's rise
                cumul_rise = cumul_rise + logs_df['Init Rise'][x] # cumultaive rise of all active appliances
            print(f'Cumulative Rise excluding {nm} = {cumul_rise}')
            
            # maybe instead of using init_rise above, use present remaining values!!!
            if math.isclose(total_rise,cumul_rise,abs_tol=abs(cumul_rise*1/100)): #is current rise close to cumultaive rise
                print('cumulative sum was found remaining')
                logs_df.loc[[num],['Turned OFF']] = pd.to_datetime(ts)
                print(f'{nm} turned off at {ts}')
                logs_df.loc[[num],['Flag']] = 1
#                 print(f'time taken to track down {time.time() - start}')
                return 1
                      
            # not sure about this one, shold tell for plus one active devie only
            
            for idx, x in enumerate(logs_df['Init Rise']): # helps in breaking if this device is on but the other was turned off.possible bug may be in tolerance being 2%.
                
                if logs_df['Flag'][idx] ==0 :    
                    
                    if math.isclose(total_rise,x,abs_tol=abs(x*2/100)): #if more than two appliances are there, then might have to add their individual rises and check.
                        print('Found the value to be same!!!!')
                        return 0
                    else:
                        print('value not same') #for debugging
            return 0

        
        else: # if only this appliance is on
            init_rise = logs_df['Init Rise'][num]
            if math.isclose(total_rise,0,abs_tol=abs(init_rise*1/100)): #fall caused problem, replaced with zero, however actually there would be a phantom consumption so might replace with that!!! ??
                logs_df.loc[[num],['Turned OFF']] = pd.to_datetime(ts)
                print(f'{nm} turned off at {ts}')
                logs_df.loc[[num],['Flag']] = 1
#                 print(f'time taken to track down 3{time.time() - start}')
                return 1
            else:
                return 0
    
    
    else: #if the appliance was not on
        return 0

In [997]:
#global variables 
global local_db
local_db = np.array([])
global local_names 
local_names = np.array([])
global logs_df
logs_df = pd.DataFrame([],columns = ['Sig','Name','Turned ON','Init Rise','Current Value',
#                                      'Fall',
                                     'Turned OFF','Flag'])

In [998]:
SIG_SIZE = 90 # 1 sec contains 30 samples ; 3 seconds 90 samples.
THRSHLD = 5 # Rise in kWH to be detected for classification
BUFFER_SIZE = 180 # signal buffer size i.e 6 seconds (1s has 30 samples)
i = 1 #test purpose iterator
global prev_mean
prev_mean = 0 
# These are for creating local database and saving signatures!!!
prev_guess=''
detected_appliances = 0
deployed = False

In [1005]:
ser = serial.Serial('COM3', 115200) # remove later
time.sleep(2)

if deployed == False: # to check if the device is installed for the first time
    print('Installed Minion for the First Time! Give it a day to learn the devices in your home...')
    install_time = datetime.now() 
    training_period = install_time + timedelta(days = 1) # determines for how long the minion should be in training period :1day

deployed = True

# Read and record the data
for i in range(100):
    
    ct = datetime.now()
    if ct >= training_period: # if the minion has trained for one day
        print('Training period over. Witness the power of Minion!!!')
        clf = retrain_model(local_db,local_names) #train local model and use it for classification. also maybe send only new signatures!!!
        training_period = ct + timedelta(days = 1) #retrain model after one day. also might change it to update at midnights only.
    
    
    buffer, time_buffer = get_signal(BUFFER_SIZE) # 6s seconds data into buffer
    events = event_detection(buffer) # if no on events, check for off events
    if len(events)>0: # events detected ; can remove this if statement because its redundant 
#         start = time.time()
#         print(f'{len(events)} events detected') #works fine
        for ev in events:
            sig = buffer[ev:ev+SIG_SIZE]
            if len(sig) != SIG_SIZE: #if signal is not enough reconstruct by getting new data
#                 print('signal not adequate, shifting buffer') #works fine
                buffer = np.roll(buffer, -SIG_SIZE) 
                time_buffer = np.roll(time_buffer, -SIG_SIZE)
                temp_buffer, temp_time_buffer = get_signal(SIG_SIZE)
                buffer[BUFFER_SIZE-SIG_SIZE:] = temp_buffer
                time_buffer[BUFFER_SIZE-SIG_SIZE:] = temp_time_buffer
                events = event_detection(buffer) # recheck events to get new indices, later requires breaking outer for loop
                print(f'{len(events)} events detected on reconstructing')
            if len(events)>0:
                for ev in events:
                    sig = buffer[ev:ev+SIG_SIZE] # extracting signature from buffer
                    if sig.mean() > prev_mean: # signifies a rise in signal
                        sig = sig - prev_mean # remove previous bias
                        sig_ts = time_buffer[ev] #extract time stamp as well
                        sig = sig.reshape((sig.shape[0]//SIG_SIZE,SIG_SIZE)) # reshape for classifier
#                         rise = buffer[ev+SIG_SIZE:].mean() # data to log , maybe put rise as steady state value
                        temp_buffer, _ = get_signal(90)
                        rise = temp_buffer.mean() #to get steady state rise
                        init_rise = rise - prev_mean
#                         prev_mean = rise #causing problem of false turn off
                        guess = clf.predict(sig)[0] # predict
                        end = time.time() # for timing information
                        acc = clf.predict_proba(sig).max() #check accuracy
#                         print(f'{guess} turned on at {sig_ts} with rise {rise}, Acc.: {acc}')
                        if acc > 0.80: 
                            if prev_guess == guess:
                                temp_name = logs_df['Name'].to_numpy()[-1]
                                print(f'It seems the {temp_name} was turned on again at {sig_ts}. ') 
                                print(f'It probably has multiple cycles. If true retype {temp_name}, else name it\n')
                                name = input('Enter the name!\n') # to save the signaure with user defined name for future classification
                                if name == logs_df['Name'].to_numpy()[-1]:
                                    # for clubbing
                                    # save signature
                                    pass
                                else:
                                    # save signature
                                    pass
                                prev_guess = guess #redundant may remove
                                log_event(name,sig_ts,init_rise,rise,sig) #store data
                                update_local_db(sig,name)
                                #here might want to decrement active_devices and remove start time from it and club it by comparing prev and current guess!
                            else:
                                name = input(f'An appliance  was detected at {sig_ts}! Some people named it as {guess}, would you like to name it?\n')
                                prev_guess = guess
                                log_event(name,sig_ts,init_rise,rise,sig)
                                update_local_db(sig,name)
                        else:
                            name = input(f'An unrecognized device was detected at {sig_ts}, it seems {guess} but not sure, so please name it\n')
#                             name = guess
                            prev_guess = guess
                            log_event(name,sig_ts,init_rise,rise,sig)
                            update_local_db(sig,name)
                detected_appliances += 1
#                 print(f'Total time taken to identify the appliance {end-start:.3f} s')
                break # breaks previous 'for-loops' of events 
        

    else:
        prev_mean = buffer.mean() #keep tracking the signal level
        for aplnc in range(detected_appliances): #if there are active devices
            # update values of rise to find off event
            m = track_device(aplnc,prev_mean,time_buffer[-1])
            if m == 1:
                mm,ss = divmod((pd.to_datetime(logs_df['Turned OFF'][aplnc]) - pd.to_datetime(logs_df['Turned ON'][aplnc])).total_seconds(),60)
                n = logs_df['Name'][aplnc]
                print(f'The {n} ran for a total of {int(mm)} minutes and {int(ss)} seconds')
                break #breaks i iterator creating error because never ending loop is occuring have to move this break out.
    
#every time a device is turned off copy the logs_df to temp_df to reduce time complexity of finding turning off of devices
# also it may help remove bugs due to cumulative sum of init rise
        

Training period still not over
0.00,2020-11-6 4:54:9.851

Training period still not over
34.00,2020-11-6 4:54:14.837

34.00,2020-11-6 4:54:16.331

An appliance  was detected at 2020-11-6 4:54:10.730
! Some people named it as drill, would you like to name it?
fan
Training period still not over
35.00,2020-11-6 4:54:21.308

 fan is being tested for being turned OFF
Training period still not over
0.00,2020-11-6 4:54:26.298

 fan is being tested for being turned OFF
Training period still not over
0.00,2020-11-6 4:54:31.271

 fan is being tested for being turned OFF
fan turned off at 2020-11-6 4:54:31.271

The fan ran for a total of 0 minutes and 20 seconds
Training period still not over
0.00,2020-11-6 4:54:36.244

Training period still not over


KeyboardInterrupt: 

In [1006]:
ser.close()

In [1007]:
logs_df

Unnamed: 0,Sig,Name,Turned ON,Init Rise,Current Value,Turned OFF,Flag
0,"[0.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0...",fan,2020-11-06 04:53:03.293,34.0,0.0,2020-11-06 04:53:25.515000,1
1,"[0.0, 38.0, 38.0, 38.0, 38.0, 38.0, 38.0, 38.0...",fan,2020-11-06 04:54:10.730,34.0,0.0,2020-11-06 04:54:31.271000,1


In [1008]:
local_db

array([ 0., 43., 43., 43., 43., 43., 43., 43., 43., 39., 39., 39., 39.,
       39., 39., 39., 39., 39., 39., 39., 36., 36., 36., 36., 36., 36.,
       36., 36., 36., 36., 36., 36., 35., 35., 35., 35., 35., 35., 35.,
       35., 35., 35., 35., 34., 34., 34., 34., 34., 34., 34., 34., 34.,
       34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34.,
       34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34.,
       34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34., 34.,  0.,
       38., 38., 38., 38., 38., 38., 38., 38., 38., 38., 38., 38., 47.,
       47., 47., 47., 47., 47., 47., 47., 47., 47., 47., 46., 46., 46.,
       46., 46., 46., 46., 46., 46., 46., 46., 46., 45., 45., 45., 45.,
       45., 45., 45., 45., 45., 45., 45., 41., 41., 41., 41., 41., 41.,
       41., 41., 41., 41., 41., 41., 38., 38., 38., 38., 38., 38., 38.,
       38., 38., 38., 38., 36., 36., 36., 36., 36., 36., 36., 36., 36.,
       36., 36., 36., 35., 35., 35., 35., 35., 35., 35., 35.])

In [1009]:
local_names

array(['fan', 'fan'], dtype='<U32')

# After training Period
* **Use Logs_df to create local classifier**
* **Cross verify the results**
* **It is advised that each appliance is recorded atleast 10 times**

In [914]:
clf = retrain_model(local_db)

In [931]:
clf.predict(X_test) == y_test # not required

17    True
20    True
23    True
6     True
9     True
7     True
0     True
Name: Name, dtype: bool

In [916]:
# now in the main function this new local classifier will be used!!!