# How to load data and retrain the model !!!.

In [61]:
# from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import datetime
import pickle
from sklearn.model_selection import RepeatedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score
import os

In [85]:
def retrain_model(*args,main_db):
    """
    1. Takes the database as first argument, and any number of CSV file names (without the extension) of new devices as input.
    2. The CSV columns (representing the signature for one minute ) should be as follows:
        A1,PF1,A2,PF2,A3,PF3 ... A59,PF59,A60,PF60
    3. The number of rows should be atleast 100.

    4. Retrains the model. 
    5. Returns the new model.
    """
    #device files are loaded into dataframe and appended to the main database
    devices = []
    for x in args:
        devices.append(x)
    for x in devices:
        df = pd.read_csv(f'{x}.csv')
        for y in devices:
            if y == x:
                df[f'{y}'] = 1
            else:
                df[f'{y}'] = 0
        main_db = main_db.append(df,ignore_index = True)
    
    
    #creating train,test sets
    col_labels = main_db.columns[120:] 
    X = main_db.drop(labels=col_labels,axis =1)
    y = main_db[col_labels]
    
    
    #determining number of nodes
    results = np.array([])
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    np.random.seed(42)
    
    
    #defining the model
    model = Sequential()
    model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    
    
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        # prepare data
        X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
        y_train, y_test = y.iloc[train_ix], y.iloc[test_ix]
        # fit model
        model.fit(X_train, y_train, verbose=0, epochs=100)
        # make a prediction on the test set
        preds = model.predict(X_test)
        # round probabilities to class labels
        preds = preds.round()
        # calculate accuracy
        acc = accuracy_score(y_test, preds)
        # store result
        print('>%.3f' % acc)
        results = np.append(results,[acc])
        model_accuracy = np.mean(results)
        score_deviation = np.std(results)
    print('Accuracy: %.3f std (%.3f)'% (model_accuracy, score_deviation))
    
    prompt = int(input('input 1 to save the retrained model'))
    if prompt == 1:
        date = str(datetime.date.today())
        # Create model directory with current time
        modeldir = os.path.join("./models",date)
        model_path = modeldir + "-" + "accuracy" + "-" + str(int(model_accuracy*100))  + ".h5" # save format of model
        print(f"Saving model to: {model_path}...")
        model.save(model_path)
        print('saved')
    else:
        print('returning model')
        
    return model,main_db

In [None]:
db = pd.DataFrame()
model,db = retrain_model('fan','led','washer','laptop','toaster',main_db=db)

>0.250
>0.125
>0.375
>0.500
>0.250
>0.857
>1.000
>0.429
>0.286
>0.857
>0.500
>0.875
>0.500
>0.625
>0.750
>0.571
>0.429
>0.857
>0.571
>0.714
>0.500


In [42]:
db

Unnamed: 0,A1,PF1,A2,PF2,A3,PF3,A4,PF4,A5,PF5,...,PF58,A59,PF59,A60,PF60,fan,led,washer,laptop,toaster
0,82.0,0.92,82.0,0.91,82.0,0.91,82.0,0.91,82.0,0.91,...,0.91,82.0,0.91,82.0,0.91,1,0,0,0,0
1,83.0,0.91,82.0,0.91,82.0,0.91,83.0,0.91,81.0,0.91,...,0.91,81.0,0.91,81.0,0.91,1,0,0,0,0
2,81.0,0.91,81.0,0.92,81.0,0.92,81.0,0.91,81.0,0.91,...,0.91,81.0,0.91,81.0,0.91,1,0,0,0,0
3,81.0,0.91,81.0,0.91,81.0,0.91,81.0,0.91,81.0,0.91,...,0.91,81.0,0.91,81.0,0.91,1,0,0,0,0
4,81.0,0.91,81.0,0.91,81.0,0.91,81.0,0.91,81.0,0.91,...,0.92,81.0,0.91,81.0,0.91,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,738.0,1.00,739.0,1.00,739.0,1.00,738.0,1.00,738.0,1.00,...,1.00,736.0,1.00,736.0,1.00,0,0,0,0,1
71,736.0,1.00,737.0,1.00,737.0,1.00,737.0,1.00,737.0,1.00,...,1.00,736.0,1.00,737.0,1.00,0,0,0,0,1
72,736.0,1.00,736.0,1.00,736.0,1.00,736.0,1.00,735.0,1.00,...,1.00,740.0,1.00,740.0,1.00,0,0,0,0,1
73,739.0,1.00,740.0,1.00,740.0,1.00,741.0,1.00,740.0,1.00,...,1.00,739.0,1.00,738.0,1.00,0,0,0,0,1
