In [321]:
import numpy as np
import pandas as pd
import tensorflow as tf


In [318]:
def sample_to_7dsignature(*args):
    """
    Takes a CSV file name (without extension) having samples of device as input.
    The column names must be as : V1,C1,A1,PF1,AP1,RP1,F1,T
    Converts it to a dataset of signatures of duration 1 min.
    Exports the modified CSV in seconds frequency, and signature format in separate files as: 
        1. (filename_seconds.csv) 
        2. (filename_7d.npy).
        3. 7D numpy array
        4. labels
    Returns 1. the 7-D signature of device, and the 2. appropriate labels in a pandas series format. 
    
    """
    import csv
    import numpy as np
    import pandas as pd
    
    devices = []
    for x in args:
        devices.append(x)
    for fn in devices:
        df = pd.read_csv(f'{fn}.csv')
        df['T'] = pd.DatetimeIndex(df['T'],tz = 'Asia/Kolkata')
        df = df.set_index('T')
        df.resample('S').mean()
        df.to_csv(f'{fn}_seconds.csv',index= False)

    
        with open(f'{fn}_seconds.csv', 'r') as fp:
            next(fp) # ignore column names
            np_v  =np.array([])
            np_c  =np.array([])
            np_a  =np.array([])
            np_pf =np.array([])
            np_ap =np.array([])
            np_rp =np.array([])
            np_f  =np.array([])
            params = [np_v,np_c,np_a,np_pf,np_ap,np_rp,np_f]  

            for i, line in enumerate(fp):
                line  = line[:-2]  #remove \n from end of string.
                line = np.array(line.split(sep = ','))  # array of string values
                line = line.astype(np.float)    # array of float values v,c,a,pf,ap,rp,f
    #             print(line) 
    #             print(line.size)  7
    #             print(line.shape) 7,
                for idx, val in enumerate(line):
                    #i will give us the parameter name ,  x will give us the parameter value
                # not able to reduce code due to some issue with numpy array not being updated
                    if idx == 0:
                        np_v = np.append(np_v,val)
                    elif idx==1:
                        np_c = np.append(np_c,val)
                    elif idx==2:
                        np_a = np.append(np_a,val)
                    elif idx==3:
                        np_pf = np.append(np_pf,val)
                    elif idx==4:
                        np_ap = np.append(np_ap,val)
                    elif idx==5:
                        np_rp = np.append(np_rp,val)
                    elif idx==6:
                        np_f = np.append(np_f,val) # append values to corresponding arrays 
    #         print(np_v,np_c,np_a,np_pf,np_ap,np_rp,np_f)  #correct
    #         print(np_v.size) #5559



            extra_samples = (np_v.size)%60  # a signature is of length 60, thus delete extra samples
    #         print(np_v.shape) (5559,)

            if extra_samples !=0:
                np_v = np.delete(np_v,np.s_[-extra_samples:])
                np_c = np.delete(np_c,np.s_[-extra_samples:])
                np_a = np.delete(np_a,np.s_[-extra_samples:])
                np_pf = np.delete(np_pf,np.s_[-extra_samples:])
                np_ap = np.delete(np_ap,np.s_[-extra_samples:])
                np_rp = np.delete(np_rp,np.s_[-extra_samples:])
                np_f = np.delete(np_f,np.s_[-extra_samples:])



            samples = np_v.size
            no_signatures = samples//60  # no of signatures per device collected

            np_v = np_v.reshape((no_signatures),60)  #convert samples to a stream of 1 minute
            np_c = np_c.reshape((no_signatures),60)
            np_a = np_a.reshape((no_signatures),60)
            np_pf = np_pf.reshape((no_signatures),60)
            np_ap = np_ap.reshape((no_signatures),60)
            np_rp = np_rp.reshape((no_signatures),60)
            np_f = np_f.reshape((no_signatures),60)
    #         print(np_v.shape)  (92, 60)


            li = [] #this will hold all the signatures of a device 
    #         print(no_signatures)  #92
            for x in range(no_signatures):
                # signature_array will be 7 dimensional array with a length of 1 row x 60 columns
                signature_array = np.array([
                                            [np_v[x]],
                                            [np_c[x]],
                                            [np_a[x]],
                                            [np_pf[x]],
                                            [np_ap[x]],
                                            [np_rp[x]],
                                            [np_f[x]],
                                          ])
    #             print(signature_array.shape)  #(7, 1, 60)
                li.append(signature_array)
            li = np.array(li)
    #         print(li.shape) #(92, 7, 1, 60)
            no_rows = li.shape[0]
    #         print(no_rows) #92
            label_series = pd.Series([],dtype='string')
            for x in range (no_rows):
                label_series[x] = fn
            np.save(f'{fn}',li)
            label_series.to_csv(f'{fn}_labels.csv',index=False)
    return 

In [310]:
labels = pd.Series([],dtype = 'string')

In [320]:
sample_to_7dsignature('3dprinter','ac','fridge','wallfan')

In [355]:
arr = np.load('3dprinter.npy')
arr.shape
        

(92, 7, 1, 60)

In [356]:
brr = np.load('ac.npy')
brr.shape

(99, 7, 1, 60)

In [344]:
crr = np.append(arr,brr,axis = 0)

In [345]:
crr.shape

(191, 7, 1, 60)

In [353]:
crr[91][1][0][0]

0.41

In [370]:
a = np.array([[1,2,3],[4,5,6],[13,14,15]])
b = np.array([[7,8,9],[10,11,12]])

In [371]:
a.shape

(3, 3)

In [373]:
b.shape

(2, 3)

In [372]:
c= np.array([])
c = np.append(c,a)
c= np.append(c,b)
c.shape

(15,)

In [374]:
c = c.reshape((5,3))

In [375]:
c

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.],
       [13., 14., 15.],
       [ 7.,  8.,  9.],
       [10., 11., 12.]])

In [394]:
def get_xy(*args):
    devices = []
    for x in args:
        devices.append(x)
    size = 0
    files = np.array([])
    labels = np.array([])
    for fn in devices:
        frr = np.load(f'{fn}.npy')
        files = np.append(files,frr)
        lrr = pd.read_csv(f'{fn}_labels.csv').to_numpy()
        labels = np.append(labels,lrr)
        s = frr.shape[0]
        size += s
#     print(size) #379
    files = files.reshape((size,7,1,60))
#     print(files.shape,labels.shape) #(379, 7, 1, 60) (379,)
    return files,labels        

In [413]:
files,labels = get_xy('3dprinter','ac','fridge','wallfan')

92
191
284
379
(379, 7, 1, 60) (379,)


In [414]:
unique_devices = np.unique(labels)
unique_devices

array(['3dprinter', 'ac', 'fridge', 'wallfan'], dtype=object)

In [415]:
y[0]==unique_devices

array([ True, False, False, False])

In [417]:
# Turn every label into a boolean array
boolean_labels = [label == np.array(unique_devices) for label in labels]
boolean_labels

[array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ True, False, False, False]),
 array([ Tru

In [418]:
# Setup X & y variables
X = files
y = boolean_labels

In [None]:
filenames = ['3dprinter','ac','fridge','wallfan']

In [None]:
# Define the batch size, 32 is a good default
BATCH_SIZE = 32

# Create a function to turn data into batches
def create_data_batches(x, y=None, batch_size=BATCH_SIZE, valid_data=False, test_data=False):
  """
  Creates batches of data out of  (x) and label (y) pairs.
  Shuffles the data if it's training data but doesn't shuffle it if it's validation data.
  Also accepts test data as input (no labels).
  """
  # If the data is a test dataset, we probably don't have labels
    if test_data:
        print("Creating test data batches...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x))) # only filepaths
        data_batch = data.map(process_image).batch(BATCH_SIZE)
        return data_batch
  
  # If the data if a valid dataset, we don't need to shuffle it
    elif valid_data:
        print("Creating validation data batches...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x), # filepaths
                                                   tf.constant(y))) # labels
        data_batch = data.map(get_image_label).batch(BATCH_SIZE)
        return data_batch

    else:
        # If the data is a training dataset, we shuffle it
        print("Creating training data batches...")
        # Turn filepaths and labels into Tensors
        data = tf.data.Dataset.from_tensor_slices((tf.constant(x), # filepaths
                                                  tf.constant(y))) # labels
    
        # Shuffling pathnames and labels before mapping image processor function is faster than shuffling images
        data = data.shuffle(buffer_size=len(x))

        # Create (image, label) tuples (this also turns the image path into a preprocessed image)
        data = data.map(get_image_label)

        # Turn the data into batches
        data_batch = data.batch(BATCH_SIZE)
    return data_batch

In [308]:
cc.to_csv('3dprinter.csv',index= False)

In [306]:
cc

Unnamed: 0,V1,C1,A1,PF1,AP1,RP1,F1,T
0,254.25,0.38,62,0.65,95.53,72.68,49.56,2020-08-12 00:00:00
1,254.21,0.37,60,0.64,93.46,71.65,49.57,2020-08-12 00:00:01
2,257.44,0.37,60,0.64,93.46,71.65,49.56,2020-08-12 00:00:02
3,259.92,0.37,59,0.63,93.95,73.11,49.57,2020-08-12 00:00:03
4,259.53,0.37,59,0.62,95.62,75.25,49.57,2020-08-12 00:00:04
...,...,...,...,...,...,...,...,...
5554,267.71,0.40,54,0.51,106.09,91.32,50.05,2020-08-12 01:32:34
5555,265.71,0.40,53,0.51,103.92,89.39,50.05,2020-08-12 01:32:35
5556,267.12,0.40,54,0.50,107.14,92.54,50.05,2020-08-12 01:32:36
5557,267.66,0.40,54,0.50,107.36,92.79,50.05,2020-08-12 01:32:37


In [305]:
cc['T'] = T

In [304]:
cc

Unnamed: 0,V1,C1,A1,PF1,AP1,RP1,F1
0,254.25,0.38,62,0.65,95.53,72.68,49.56
1,254.21,0.37,60,0.64,93.46,71.65,49.57
2,257.44,0.37,60,0.64,93.46,71.65,49.56
3,259.92,0.37,59,0.63,93.95,73.11,49.57
4,259.53,0.37,59,0.62,95.62,75.25,49.57
...,...,...,...,...,...,...,...
5554,267.71,0.40,54,0.51,106.09,91.32,50.05
5555,265.71,0.40,53,0.51,103.92,89.39,50.05
5556,267.12,0.40,54,0.50,107.14,92.54,50.05
5557,267.66,0.40,54,0.50,107.36,92.79,50.05


In [303]:
cc = pd.read_csv('3dprinter.csv')