In [None]:
import model_feedinput_pipeline
from model_feedinput_pipeline import CODE_ENV, DATASET_ID


In [None]:
select_input_stepsize= 3000
select_columns = {
        DATASET_ID.First : ['b1_ch1', 'b2_ch3', 'b3_ch5', 'b4_ch7'],
        DATASET_ID.Second: ['b1_ch1', 'b2_ch2', 'b3_ch3', 'b4_ch4'],
        DATASET_ID.Third : ['b1_ch1', 'b2_ch2', 'b3_ch3', 'b4_ch4']
}
#####################################################################################
#***************IMP: Update coding environment********************
#####################################################################################
code_env = CODE_ENV.EC2

dataset_paths = model_feedinput_pipeline.get_dataset_paths(code_env)


In [None]:
import os
import joblib
from pathlib import Path

import pandas as pd
import numpy as np
from numpy.random import seed

import matplotlib.pyplot as plt
#%matplotlib inline

import seaborn as sns
sns.set(color_codes=True)

from sklearn.preprocessing import MinMaxScaler
from scipy.stats import entropy



import tensorflow as tf
#tf.logging.set_verbosity(tf.logging.ERROR)

from keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from keras.models import Model
from keras import regularizers

seed(10)
tf.random.set_seed(10)


In [None]:
# Root Mean Squared Sum
def calculate_rms(df):
    result = []
    for col in df:
        r = np.sqrt((df[col]**2).sum() / len(df[col]))
        result.append(r)
    return result


# extract peak-to-peak features
def calculate_p2p(df):
    return np.array(df.max().abs() + df.min().abs())


# extract shannon entropy (cut signals to 500 bins)
def calculate_entropy(df):
    ent = []
    for col in df:
        ent.append(entropy(pd.cut(df[col], 500).value_counts()))
    return np.array(ent)


# extract clearence factor
def calculate_clearence(df):
    result = []
    for col in df:
        r = ((np.sqrt(df[col].abs())).sum() / len(df[col]))**2
        result.append(r)
    return result


def time_features(dataset_details, id:model_feedinput_pipeline.DATASET_ID, select_columns:dict):
    time_features = ['mean','std','skew','kurtosis','entropy','rms','max','p2p', 'crest', 'clearence', 'shape', 'impulse']
    tf_columns = [c+'_'+tf for c in ['B1','B2','B3','B4'] for tf in time_features]
    
    data = pd.DataFrame(columns=dataset_details[id]['col_names'])
    data = data[select_columns[id]]
    for fileindex in range(len(dataset_details[id]['paths'])):
        raw_data = model_feedinput_pipeline.get_df(dataset_details, id, fileindex, code_env)
        mean_abs = np.array(raw_data.abs().mean()).reshape(-1,1)
        std = np.array(raw_data.std()).reshape(-1, 1)
        skew = np.array(raw_data.skew()).reshape(-1, 1)
        kurtosis = np.array(raw_data.kurtosis()).reshape(-1, 1)
        entropy = calculate_entropy(raw_data).reshape(-1, 1)
        rms = np.array(calculate_rms(raw_data)).reshape(-1, 1)
        max_abs = np.array(raw_data.abs().max()).reshape(-1, 1)
        p2p = calculate_p2p(raw_data).reshape(-1, 1)
        crest = max_abs/rms
        clearence = np.array(calculate_clearence(raw_data)).reshape(-1, 1)
        shape = rms / mean_abs
        impulse = max_abs / mean_abs

        

        mean_abs = pd.DataFrame(mean_abs, columns=[c+'_mean' for c in tf_columns])
        std = pd.DataFrame(std, columns=[c+'_std' for c in tf_columns])
        skew = pd.DataFrame(skew, columns=[c+'_skew' for c in tf_columns])
        kurtosis = pd.DataFrame(kurtosis, columns=[c+'_kurtosis' for c in tf_columns])
        entropy = pd.DataFrame(entropy, columns=[c+'_entropy' for c in tf_columns])
        rms = pd.DataFrame(rms, columns=[c+'_rms' for c in tf_columns])
        max_abs = pd.DataFrame(max_abs, columns=[c+'_max' for c in tf_columns])
        p2p = pd.DataFrame(p2p, columns=[c+'_p2p' for c in tf_columns])
        crest = pd.DataFrame(crest, columns=[c+'_crest' for c in tf_columns])
        clearence = pd.DataFrame(clearence, columns=[c+'_clearence' for c in tf_columns])
        shape = pd.DataFrame(shape, columns=[c+'_shape' for c in tf_columns])
        impulse = pd.DataFrame(impulse, columns=[c+'_impulse' for c in tf_columns])

        mean_abs.index = [filename]
        std.index = [filename]
        skew.index = [filename]
        kurtosis.index = [filename]
        entropy.index = [filename]
        rms.index = [filename]
        max_abs.index = [filename]
        p2p.index = [filename]
        crest.index = [filename]
        clearence.index = [filename]
        shape.index = [filename]
        impulse.index = [filename] 

        merge = pd.concat([mean_abs, std, skew, kurtosis, entropy, rms, max_abs, p2p,crest,clearence, shape, impulse], axis=1)
        data = data.append(merge)

    cols = [c+'_'+tf for c in cols2 for tf in time_features]
    data = data[cols]

    data.index = pd.to_datetime(data.index, format='%Y.%m.%d.%H.%M.%S')
    data = data.sort_index()

    return data


In [None]:
set1 = time_features(dataset_paths, model_feedinput_pipeline.DATASET_ID.First)
set1.to_csv('set1_timefeatures.csv')

merged_data = pd.read_csv("./set1_timefeatures.csv")
merged_data = merged_data.rename(columns={'Unnamed: 0':'time'})
merged_data.set_index('time')
merged_data.describe()


