In [None]:
import tensorflow as tf
import numpy as np
import hdf5storage as hdf
import sklearn.preprocessing as process

print('aux 1.15.34')

if useColab:
  %run gdrive/My\ Drive/Colab\ Notebooks/lib/model.ipynb
else:
  %run lib/model.ipynb

ModuleNotFoundError: ignored

In [None]:
#convert to one-hot encoding
#https://machinelearningmastery.com/how-to-one-hot-encode-sequence-data-in-python/
def one_hot(labels, nclasses):
    """
    One-hot encoding

    labels:   a vector of labels
    nclasses: number of classes

    Note:
    to_categorical() is doing:
    
    y=np.zeros((labels.size, nclasses))
    i=0
    for l in labels:
        y[i,l] = 1
        i+=1
    
    Note: to decode to integer value again use:

    valueInteger = numpy.argmax(encoded[0])
    """
    #2nd arg: nclasses can be omitted if labels have all the possible values
    y = tf.keras.utils.to_categorical(labels, nclasses)
    return y

In [None]:
##########################
# load data and resample #
##########################
def minmax(X):
    #normalize min-max
    #https://swaathi.com/2017/04/29/normalizing-data/
    #[min >= 0, max <=1]
    #minimum = X.min()
    #maximum = X.max()
    #X = (X - minimum) / (maximum - minimum)
    
    #[0, 1]
    X = process.minmax_scale(X)
    return X

  
def quantileTransform(X):
    #Normalize Quantile transform
    from sklearn.preprocessing.data import QuantileTransformer
    qTransform = QuantileTransformer(output_distribution='uniform')
    qTransform.fit(X)
    X = qTransform.transform(X)
    return X


#eg. to reduce from 36 to 4 classes: 
#labels = joinClasses(labels, [(0, 0), (1, 7), (8, 14), (15, 35)])
def joinClasses(labels, join):
    for i in range(len(labels)):
        for j in range(len(join)):
            if labels[i] >=  join[j][0] and labels[i] <= join[j][1]:
                labels[i] = j
    return labels 


def loadData(filename):
     #Get data
    return hdf.loadmat(filename) 

  
def getData1(data, underSample=1):
    #SetupData
    trainXs = np.array(data['features_training'][:, ::underSample])
    trainYs = np.array(data['labels_training'], dtype=np.int8)
    testXs  = np.array(data['features_test'][:, ::underSample])
    testYs  = np.array(data['labels_test'], dtype=np.int8)
    
    #Normalize
    #print('QNormalizing')
    #trainXs = minmax(trainXs)
    #testXs  = minmax(testXs)
    #trainXs = quantileTransform(trainXs)
    #testXs  = quantileTransform(testXs)
    
    return trainXs, trainYs, testXs, testYs


def getData2(data, numClasses, trainSize=0.8, underSample=1, 
             featuresName='X', labelsName='Y', xType='float32', yType='uint8'):  
    #Get data
    X = np.array(data[featuresName][:, ::underSample])
    Y = np.array(data[labelsName], dtype=np.int8)
    
    #make sure Y as cols dim = 1 and NOT none
    #as is the default if not onehot encoded
    if len(Y.shape) == 1:
        Y = Y.reshape(Y.shape[0], 1)

    #Normalize
    #print('Normalizing')
    #X = minmax(X)
    #X = quantileTransform(X)
    
    #Separate datasets equally by fault classes
    sinalLength= int(X.shape[1])
    samples    = int(X.shape[0] / numClasses)
    splitPoint = int(samples*trainSize)
    #print(samples, splitPoint)
    
    trainXs = []
    trainYs = []
    testXs  = []
    testYs  = []

    for i in range(numClasses):
        print (i)

        #slice fault
        st = i*samples
        sp = st + splitPoint
        end = (i+1)*samples
        
        #shuffle in place each fault data before slice train/set
        p = np.random.permutation(samples)
        X[st:end, :] = X[st+p, :]
        Y[st:end, :] = Y[st+p, :]
        
        #print(trainXs.shape, trainYs.shape, testXs.shape, testYs.shape)
        #print(X[st:sp, :].shape, Y[st:sp, :].shape, X[sp:end, :].shape, Y[sp:end, :].shape)
            
        trainXs.append(X[st:sp, :])
        trainYs.append(Y[st:sp, :])
        testXs.append(X[sp:end, :])
        testYs.append(Y[sp:end, :])

    #Reshape matrices to proper sizes and define data types
    trainXs = np.array(trainXs, dtype=xType)
    trainYs = np.array(trainYs, dtype=yType)
    testXs  = np.array(testXs,  dtype=xType)
    testYs  = np.array(testYs,  dtype=yType)
    trainXs = trainXs.reshape(numClasses*splitPoint, sinalLength)
    trainYs = trainYs.reshape(numClasses*splitPoint, numClasses)
    testXs  = testXs.reshape (numClasses*(samples-splitPoint), sinalLength)
    testYs  = testYs.reshape (numClasses*(samples-splitPoint), numClasses)

    #Faults are ordered by classes
    #this shuffles faults order
    #
    #Not needed if rfit is set to Shuffle
    p = np.random.permutation(trainXs.shape[0])
    trainXs = trainXs[p]
    trainYs = trainYs[p]
    
    p = np.random.permutation(testXs.shape[0])
    testXs = testXs[p]
    testYs = testYs[p]

    return trainXs, trainYs, testXs, testYs

  
  
def getData3(data, numClasses, useSize=1, trainSize=0.8, underSample=1, 
             featuresName='X', labelsName='Y', xType='float32', yType='uint8'):  
    #Get data
    X = np.array(data[featuresName][:, ::underSample], dtype=xType)
    Y = np.array(data[labelsName], dtype=yType)
    
    #Discard part
    useSamples = int(X.shape[0]*useSize)
    us = useSamples
    X = X[:us, :]
    Y = Y[:us, :]
    
    
    #make sure Y as cols dim = 1 and NOT none
    #as is the default if not onehot encoded
    if len(Y.shape) == 1:
        Y = Y.reshape(Y.shape[0], 1)

    #Separate datasets
    samples    = int(X.shape[0])
    splitPoint = int(samples*trainSize)
    sp = splitPoint

    trainXs = X[:sp, :]
    testXs  = X[sp:, :]
    trainYs = Y[:sp, :]
    testYs  = Y[sp:, :]
    
    return trainXs, trainYs, testXs, testYs
  
 

def setupData(trainXs, trainYs, testXs, testYs, numClasses=1, samplesSlice=1, samplesPart=0, windowSlice=1, conv2Drows=1, conv2Dcols=1, underSample=1, useCNN='1D', DTYPE=None):
    
    #get smaller sample set
    #just done to halve for now!!!!
    if samplesSlice > 1:
        if samplesPart == 0:
            #lower half
            trainXs = trainXs[0:int(trainXs.shape[0]/samplesSlice)]
            testXs  = testXs [0:int(testXs.shape[0] /samplesSlice)]
            trainYs = trainYs[0:int(trainYs.shape[0]/samplesSlice)]
            testYs  = testYs [0:int(testYs.shape[0] /samplesSlice)]
        else:
            #upper half
            trainXs = trainXs[int(trainXs.shape[0]/samplesSlice): ]
            testXs  = testXs [int(testXs.shape[0] /samplesSlice): ]
            trainYs = trainYs[int(trainYs.shape[0]/samplesSlice): ]
            testYs  = testYs [int(testYs.shape[0] /samplesSlice): ]
            
    #need a 3 dim for CNN. This can be use to put all channels
    if useCNN == '1D':
        #Pass to Window slice divide Window 50 000 pts by WindowSlice factor
        trainXs = trainXs.reshape(int(trainXs.shape[0]*windowSlice), int(trainXs.shape[1]/conv2Dcols/windowSlice), conv2Dcols)
        testXs  = testXs.reshape (int(testXs.shape[0]*windowSlice),  int(testXs.shape[1]/conv2Dcols/windowSlice),  conv2Dcols)

    if useCNN == '2D':
        #Pass to Window slice divide Window 50 000 pts by WindowSlice factor
        trainXs = trainXs.reshape(int(trainXs.shape[0]*windowSlice), int(trainXs.shape[1]/conv2Drows/conv2Dcols/windowSlice), conv2Drows, conv2Dcols)
        testXs  = testXs.reshape (int(testXs.shape[0]*windowSlice),  int(testXs.shape[1]/conv2Drows/conv2Dcols/windowSlice),  conv2Drows, conv2Dcols)

    if useCNN is None: #Dense MLP
        trainXs = trainXs.reshape(int(trainXs.shape[0]*windowSlice), int(trainXs.shape[1]/windowSlice))
        testXs  = testXs.reshape (int(testXs.shape[0]*windowSlice),  int(testXs.shape[1]/windowSlice))

    
    #If windowSlice>1 add labels to new windows
    trainYs = np.repeat(trainYs, windowSlice, axis=0)
    testYs  = np.repeat(testYs,  windowSlice, axis=0)
    
    
    #Just to test time to train each epoch of 100 000 pts windows
    #but it is wrong since labels are wrong
    #and original signal was shuffled in 50 000 points windows
    #trainYs = trainYs[0:int(trainYs.shape[0]/2)]
    #testYs  = testYs [0:int(testYs.shape[0]/2)]
    
    #Set float 16, 32, 64 on supported GPUs
    if DTYPE is not None:
        tf.keras.backend.set_floatx(DTYPE)
        if DTYPE == 'float16':
            tf.keras.backend.set_epsilon(1e-4) #default is 1e-7 reduce to avoid NAN (must be tested)

        trainXs = tf.keras.backend.cast_to_floatx(trainXs)
        testXs  = tf.keras.backend.cast_to_floatx(testXs)
        trainYs = tf.keras.backend.cast_to_floatx(trainYs)
        testYs  = tf.keras.backend.cast_to_floatx(testYs)
        '''
        #or:
        trainXs = trainXs.astype(DTYPE)
        trainYs = trainYs.astype(DTYPE)
        testXs  =  testXs.astype(DTYPE)
        testYs  =  testYs.astype(DTYPE)
        '''

    return trainXs, trainYs, testXs, testYs

In [None]:
################
# define model #
################
def setupModel(inputLen : int, numClasses=1, windowSlice=1, conv1Dcols=1, dropOutRatio=0.5, filters=32, useCNN=True): 
    if useCNN == True:
        #model = cnnModel1(int(inputLen/conv1Dcols/windowSlice), conv1Dcols, numClasses, dropOutRatio, filters)
        model = cnnModel31(int(inputLen/conv1Dcols/windowSlice), conv1Dcols, numClasses, dropOutRatio, 32, 20)
        #model = cnnModel32(int(inputLen/conv1Dcols/windowSlice), conv1Dcols, numClasses, dropOutRatio, 32, 20)
        #model = cnnModel5(int(inputLen/conv1Dcols/windowSlice), conv1Dcols, numClasses, dropOutRatio, 32, 20)
        #model = cnnModel4(int(inputLen/conv1Dcols/windowSlice, conv1Dcols, numClasses, dropOutRatio, 10, 20)
        #model = cnnModel6(int(inputLen/conv1Dcols/windowSlice), conv1Dcols, numClasses, dropOutRatio, 16, 20)
        #model = cnnModel7(int(inputLen/conv1Dcols/windowSlice), conv1Dcols, numClasses, dropOutRatio, 32, 20)
    else:
        model = denseModel(inputLen/windowSlice, numClasses) 
    return model
  
################
# define model #
################
def setupCNN2D(inputLen, numClasses=1, windowSlice=1, conv2Drows=1, conv2Dcols=1, dropOutRatio=0.5, filters=32): 
    #model = cnnModel31_2D(int(inputLen/conv2Drows/conv2Dcols/windowSlice), conv2Drows, conv2Dcols, numClasses, dropOutRatio, 32, 20) #use filters (conv2Drows, 20)
    model = cnnModel32_2D(int(inputLen/conv2Drows/conv2Dcols/windowSlice), conv2Drows, conv2Dcols, numClasses, dropOutRatio, 32, 20) #use filters (conv2Drows, 20)
    return model

In [None]:
#https://github.com/tensorflow/tensorflow/issues/29798

def getTPU():
    try:
        deviceName = os.environ['COLAB_TPU_ADDR']
        TPUaddr = 'grpc://' + deviceName
        print('Found TPU at: {}'.format(TPUaddr))
    except KeyError:
        print('TPU not found')
        sys.exit(0)
    
    return TPUaddr
  

def setupTPUModel114(inputLen, numClasses=1, windowSlice=1, conv1Dcols=1, dropOutRatio=0.5, filters=10, useCNN=True):
    TPUaddr = getTPU()
    '''
    resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPUaddr)
    tf.contrib.distribute.initialize_tpu_system(resolver)
    strategy = tf.contrib.distribute.TPUStrategy(resolver)
    '''
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=TPUaddr)
    tf.config.experimental_connect_to_host(resolver.master())
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.experimental.TPUStrategy(resolver)
    with strategy.scope():
        tpuModel = setupModel(inputLen, numClasses, windowSlice, conv1Dcols, dropOutRatio, filters)
    
    return tpuModel
  
def setupTPUModel(model):    
    TPUaddr = getTPU()
    return tf.contrib.tpu.keras_to_tpu_model(
        model,
        strategy=tf.contrib.tpu.TPUDistributionStrategy(
        tf.contrib.cluster_resolver.TPUClusterResolver(
        tpu=TPUaddr)
        ))
   
"""
tpuModel = cnnModelTPU(strategy, inputLen/conv1Dcols/windowSlice, conv1Dcols, numClasses, dropOutRatio, filters)
with strategy.scope():
    tpuModel.compile(optimizer=tf.keras.optimizers.Adam(lr=initialLR), loss='mse', metrics=['acc', 'mae', 'mse'])
    #tpuModel.compile(optimizer=tf.keras.optimizers.Adam(lr=initialLR), loss='categorical_crossentropy', metrics=['acc', 'mae', 'mse'])
    '''
    tpuModel.compile(
        optimizer=tf.train.AdamOptimizer(learning_rate=initialLR, ),
        #loss=tf.keras.losses.sparse_categorical_crossentropy,
        #metrics=['sparse_categorical_accuracy']
        #loss=tf.keras.losses.categorical_crossentropy,
        #metrics=['categorical_accuracy'])       
        loss='mse',
        metrics=['acc', 'mae', 'mse']
    )
    '''
"""
    

"\ntpuModel = cnnModelTPU(strategy, inputLen/conv1Dcols/windowSlice, conv1Dcols, numClasses, dropOutRatio, filters)\nwith strategy.scope():\n    tpuModel.compile(optimizer=tf.keras.optimizers.Adam(lr=initialLR), loss='mse', metrics=['acc', 'mae', 'mse'])\n    #tpuModel.compile(optimizer=tf.keras.optimizers.Adam(lr=initialLR), loss='categorical_crossentropy', metrics=['acc', 'mae', 'mse'])\n    '''\n    tpuModel.compile(\n        optimizer=tf.train.AdamOptimizer(learning_rate=initialLR, ),\n        #loss=tf.keras.losses.sparse_categorical_crossentropy,\n        #metrics=['sparse_categorical_accuracy']\n        #loss=tf.keras.losses.categorical_crossentropy,\n        #metrics=['categorical_accuracy'])       \n        loss='mse',\n        metrics=['acc', 'mae', 'mse']\n    )\n    '''\n"

In [None]:
#################################
# train model CPU or GPU or TPU #
#################################

#Rewrite as in classifier

def trainModel(trainXs, trainYs, epochs=100, batchSize=1, callbacks=[], shuffleValData=False):
    if shuffleValData:
        shuffle = False
    else:
        shuffle = True

    if shuffleValData == False:
        trainResponse = model.fit(trainXs, trainYs,
                        epochs=epochs,
                        batch_size=batchSize,
                        shuffle=shuffle,      #shuffle before each epoch (already shuffled)
                        validation_split=0.3, #use 30% of samples to validate
                        callbacks=callbacks,
                        verbose=1, #default
                        )
    else:
        for e in range(epochs):
            print(f'Epoch: {e+1:04d}/{epochs:04d}')
            print("Shuffling train data...")
            p = np.random.permutation(trainXs.shape[0])
            #trainResponse = model.fit(trainXs, trainYs,
            trainResponse = model.fit(trainXs[p], trainYs[p],
                        epochs=1,
                        batch_size=batchSize,
                        shuffle=shuffle,      #already shuffled
                        validation_split=0.3, #use 30% of samples to validate
                        callbacks=callbacks,
                        verbose=1, #default
                        )
    return trainResponse

In [None]:
'''
def train_input_fn(trainXs, trainYs, batchSize=1024):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((trainXs, trainYs))
    # Shuffle, repeat, and batch the examples.
    dataset = dataset.cache()
    dataset = dataset.shuffle(1000, reshuffle_each_iteration=True)
    dataset = dataset.repeat()
    dataset = dataset.batch(batchSize, drop_remainder=True)
    # Return the dataset.
    return dataset
'''

'\ndef train_input_fn(trainXs, trainYs, batchSize=1024):\n    # Convert the inputs to a Dataset.\n    dataset = tf.data.Dataset.from_tensor_slices((trainXs, trainYs))\n    # Shuffle, repeat, and batch the examples.\n    dataset = dataset.cache()\n    dataset = dataset.shuffle(1000, reshuffle_each_iteration=True)\n    dataset = dataset.repeat()\n    dataset = dataset.batch(batchSize, drop_remainder=True)\n    # Return the dataset.\n    return dataset\n'

In [None]:
#Main
'''
import numpy as np

if __name__ == "__main__":
    labels = np.random.randint(36, size=100)
    print(labels)
    labels = joinClasses(labels, [(0, 0), (1, 7), (8, 14), (15, 35)])
    print(labels)
'''

'\nimport numpy as np\n\nif __name__ == "__main__":\n    labels = np.random.randint(36, size=100)\n    print(labels)\n    labels = joinClasses(labels, [(0, 0), (1, 7), (8, 14), (15, 35)])\n    print(labels)\n'