In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [113]:
def load_data(file_name):
    df = pd.read_csv('dataset/LBNL_FDD_Dataset_SDAHU_all_3/LBNL_FDD_Dataset_SDAHU/'+file_name)
    return df

def save_df(df, file_name):
    df.to_csv('dataset/LBNL_FDD_Dataset_SDAHU_all_3/LBNL_FDD_Dataset_SDAHU/'+file_name)

In [12]:
df_correct = load_data('correct_data.csv')
df_correct.drop(['Unnamed: 0'], axis=1, inplace=True)

In [16]:
features = list(df_correct.keys())

In [109]:
features

['MA_TEMP', 'RA_TEMP', 'RF_WAT']

In [101]:
from sklearn.feature_selection import VarianceThreshold

def remove_low_variance_features(df, threshold=0.1):
    """
    Prints the variance of each feature and removes features with variance below the threshold.

    Parameters:
    - df: pandas DataFrame
        The input DataFrame with features.
    - threshold: float, optional (default=0.1)
        The threshold below which features will be removed.

    Returns:
    - df_filtered: pandas DataFrame
        The DataFrame with low variance features removed.
    """
    # Calculate variance for each feature
    feature_variances = df.var()

    # Print the variance of each feature
    print("Feature Variances:")
    print(feature_variances)

    # Create a VarianceThreshold instance
    variance_threshold = VarianceThreshold(threshold=threshold)

    # Fit and transform the data to remove low variance features
    df_filtered = pd.DataFrame(variance_threshold.fit_transform(df), columns=df.columns[feature_variances >= threshold])

    # Print the selected features
    selected_features = df.columns[feature_variances >= threshold]
    print(f"\nSelected Features (with variance >= {threshold}):")
    print(selected_features)

    return df_filtered

In [99]:
def sum_of_absolute_difference(lst):
    sum_abs_diff = 0
    n = len(lst)
    for i in range(n-1):
        sum_abs_diff += abs(lst[i+1] - lst[i])
    return sum_abs_diff
def safe_min(arr):
    """
    Safely computes the minimum of an array, handling zero-size arrays and NaN values.
    
    Parameters:
    - arr: numpy array
        The input array.
    
    Returns:
    - minimum: float or numpy.nan
        The minimum value, or numpy.nan if the array is empty or contains NaN values.
    """
    try:
        minimum = np.min(arr)
        return minimum
    except ValueError as e:
        pass
    
def safe_max(arr):
    """
    Safely computes the minimum of an array, handling zero-size arrays and NaN values.
    
    Parameters:
    - arr: numpy array
        The input array.
    
    Returns:
    - minimum: float or numpy.nan
        The minimum value, or numpy.nan if the array is empty or contains NaN values.
    """
    try:
        maximum = np.max(arr)
        return maximum
    except ValueError as e:
        pass
    
def interval_feature_transformation(df, window_size, fault_time=None):
    dict_ift = {}
    list_interval_features = ['mean', 'std', 'd', 'min', 'peak']
    for feat in df.keys():
        dict_ift[feat] = {}
        for interval_feature in list_interval_features:
            dict_ift[feat][feat+'-'+interval_feature] = []
    window_counter = 1
    list_ma_temp = []
    list_ra_temp = []
    list_rf_wat = []
    list_labels = []
    for row in df.iterrows():

        list_ma_temp.append(row[1][0]) # append MA_TEMP
        list_ra_temp.append(row[1][1]) # append RA_TEMP
        list_rf_wat.append(row[1][2]) # append RF_WAT


        if row[0]==window_size*window_counter or row[0]==len(df)-1:
            window_counter+=1

            for feature in dict_ift.keys():
                
                if feature == 'MA_TEMP':

                    mean_ma_temp = np.mean(list_ma_temp)
                    std_ma_temp = np.std(list_ma_temp)

                    dict_ift[feature][feature+'-'+'mean'].append(mean_ma_temp)

                    dict_ift[feature][feature+'-'+'std'].append(std_ma_temp)

                    dict_ift[feature][feature+'-'+'d'].append(sum_of_absolute_difference(list_ma_temp))
 
                    dict_ift[feature][feature+'-'+'min'].append(safe_min(list_ma_temp))

                    dict_ift[feature][feature+'-'+'peak'].append(safe_max(list_ma_temp))

                    #dict_ift[feature]['skew'].append(skewness(list_current, mean_curr, std_curr))

                    #dict_ift[feature]['kurt'].append(kurtosis(list_current, mean_curr, std_curr))
                    #dict_ift[feature]['kurt'].append(kurtosis(list_current))

                elif feature == 'RA_TEMP':

                    mean_ra_temp = np.mean(list_ra_temp)
                    std_ra_temp = np.std(list_ra_temp)

                    dict_ift[feature][feature+'-'+'mean'].append(mean_ra_temp)

                    dict_ift[feature][feature+'-'+'std'].append(std_ra_temp)

                    dict_ift[feature][feature+'-'+'d'].append(sum_of_absolute_difference(list_ra_temp))
                    
                    dict_ift[feature][feature+'-'+'min'].append(safe_min(list_ra_temp))
                    
                    dict_ift[feature][feature+'-'+'peak'].append(safe_max(list_ra_temp))
                    
                    #dict_ift[feature]['skew'].append(skewness(list_voltage, mean_volt, std_volt))

                    #dict_ift[feature]['kurt'].append(kurtosis(list_voltage, mean_volt, std_volt))
                    #dict_ift[feature]['kurt'].append(kurtosis(list_voltage))

                elif feature == 'RF_WAT':
     
                    mean_rf_wat = np.mean(list_rf_wat)
                    std_rf_wat = np.std(list_rf_wat)

                    dict_ift[feature][feature+'-'+'mean'].append(mean_rf_wat)

                    dict_ift[feature][feature+'-'+'std'].append(std_rf_wat)

                    dict_ift[feature][feature+'-'+'d'].append(sum_of_absolute_difference(list_rf_wat))
                    
                    dict_ift[feature][feature+'-'+'min'].append(safe_min(list_rf_wat))

                    dict_ift[feature][feature+'-'+'peak'].append(safe_max(list_rf_wat))
                    
                    #dict_ift[feature]['skew'].append(skewness(list_rotations, mean_rot, std_rot))

                    #dict_ift[feature]['kurt'].append(kurtosis(list_rotations, mean_rot, std_rot))
                    #dict_ift[feature]['kurt'].append(kurtosis(list_rotations))

            list_ma_temp = []
            list_ra_temp = []
            list_rf_wat = []
    df_transformed_ma_temp = pd.DataFrame(dict_ift['MA_TEMP'])
    df_transformed_ra_temp = pd.DataFrame(dict_ift['RA_TEMP'])
    df_transformed_rf_wat = pd.DataFrame(dict_ift['RF_WAT'])
    
    result = pd.concat([df_transformed_ma_temp, df_transformed_ra_temp, df_transformed_rf_wat], axis=1)

    return result
    

In [100]:
df_correct_transformed_10mins = interval_feature_transformation(df_correct, 10)

In [104]:
df_correct_transformed_10mins.to_csv('dataset/ift_transormed_correct_10mins.csv')

In [105]:
df_correct_transformed_20mins = interval_feature_transformation(df_correct, 20)

In [106]:
df_correct_transformed_20mins.to_csv('dataset/ift_transormed_correct_20mins.csv')

In [108]:
df_damper_stuck_010 = load_data('damper_stuck_010_annual.csv')
df_damper_stuck_025 = load_data('damper_stuck_025_annual.csv')
df_damper_stuck_075 = load_data('damper_stuck_075_annual.csv')
df_damper_stuck_100 = load_data('damper_stuck_100_annual_short.csv')

In [110]:
df_damper_stuck_010_10mins = interval_feature_transformation(df_damper_stuck_010[features], 10)

In [111]:
df_damper_stuck_010_20mins = interval_feature_transformation(df_damper_stuck_010[features], 20)

In [114]:
save_df(df_damper_stuck_010_10mins, 'ift_transormed_damper_stuck_010_10mins.csv')
save_df(df_damper_stuck_010_20mins, 'ift_transormed_damper_stuck_010_20mins.csv')

In [116]:
df_damper_stuck_025_10mins = interval_feature_transformation(df_damper_stuck_025[features], 10)
df_damper_stuck_025_20mins = interval_feature_transformation(df_damper_stuck_025[features], 20)
save_df(df_damper_stuck_025_10mins, 'ift_transormed_damper_stuck_025_10mins.csv')
save_df(df_damper_stuck_025_20mins, 'ift_transormed_damper_stuck_025_20mins.csv')

In [117]:
df_damper_stuck_075_10mins = interval_feature_transformation(df_damper_stuck_075[features], 10)
df_damper_stuck_075_20mins = interval_feature_transformation(df_damper_stuck_075[features], 20)
save_df(df_damper_stuck_075_10mins, 'ift_transormed_damper_stuck_075_10mins.csv')
save_df(df_damper_stuck_075_20mins, 'ift_transormed_damper_stuck_075_20mins.csv')

In [118]:
df_damper_stuck_100_10mins = interval_feature_transformation(df_damper_stuck_100[features], 10)
df_damper_stuck_100_20mins = interval_feature_transformation(df_damper_stuck_100[features], 20)
save_df(df_damper_stuck_100_10mins, 'ift_transormed_damper_stuck_100_10mins.csv')
save_df(df_damper_stuck_100_20mins, 'ift_transormed_damper_stuck_100_20mins.csv')