In [8]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from scipy.signal import decimate
from tsfresh import extract_features
from tsfresh.feature_selection import select_features
import xgboost
from xgboost import XGBClassifier

In [23]:
class DataProcessor:
    def __init__(self, input_path, file_names):
        self.input_path = input_path
        self.file_names = file_names
        
    def read_files(self):
        self.data = {}
        print("Reading files...")
        for file in self.file_names:
            with open(self.input_path + file + '.txt', 'r') as f:
                self.data[file] = pd.read_csv(f, header=None, sep='\t')
        return self.data
    
    def print_shape(self):
        print("Files read:")
        for file in self.data:
            print(f"{file}: {self.data[file].shape}")
            
    def create_target_df(self):
        target_columns = ['Cooler_Condition', 'Valve_Condition', 
                        'Internal_Pump_Leakage', 'Hydraulic_Accumulator', 
                        'Stable_Flag']
        self.data['target'].columns = target_columns
        self.valve_condition = self.data['target']['Valve_Condition']
        #del self.data['target']
        return self.valve_condition

def process_data():
    input_path = "input_data/"
    file_names = [
        "ce", "cp", "eps1", "se", "vs1", 
        "fs1", "fs2", 
        "ps1", "ps2", "ps3", "ps4", "ps5", "ps6",
        "ts1", "ts2", "ts3", "ts4", "target"
    ]
    
    processor = DataProcessor(input_path, file_names)
    data = processor.read_files()
    processor.print_shape()
    df_target = processor.create_target_df()
    df_target = processor.valve_condition
    return data, df_target

data, df_target = process_data()

Reading files...
Files read:
ce: (2205, 60)
cp: (2205, 60)
eps1: (2205, 6000)
se: (2205, 60)
vs1: (2205, 60)
fs1: (2205, 600)
fs2: (2205, 600)
ps1: (2205, 6000)
ps2: (2205, 6000)
ps3: (2205, 6000)
ps4: (2205, 6000)
ps5: (2205, 6000)
ps6: (2205, 6000)
ts1: (2205, 60)
ts2: (2205, 60)
ts3: (2205, 60)
ts4: (2205, 60)
target: (2205, 5)


In [24]:
df_list = ['eps1', 'ps3']

df_downsampled = {}

for df in df_list:
    filtered_signals = []  # Reset for each DataFrame
    if data[df].shape[1] == 6000:
        downsample_factor = 100
        for i in range(data[df].shape[0]):
            row = data[df].iloc[i].values  # Extract row as a 1D array
            filtered_signal = decimate(row, downsample_factor, ftype='fir')  # Downsample
            filtered_signals.append(filtered_signal)  # Store the result
        # Create a new DataFrame with the filtered signals and add the 'id' column
        df_downsampled[df] = pd.DataFrame(filtered_signals)
        df_downsampled[df]["id"] = df_downsampled[df].index
        df_downsampled[df]["target"] = df_target

    elif data[df].shape[1] == 600:
        downsample_factor = 10
        for i in range(data[df].shape[0]):
            row = data[df].iloc[i].values  # Extract row as a 1D array
            filtered_signal = decimate(row, downsample_factor, ftype='fir')  # Downsample
            filtered_signals.append(filtered_signal)  # Store the result
        # Create a new DataFrame with the filtered signals and add the 'id' column
        df_downsampled[df] = pd.DataFrame(filtered_signals)
        df_downsampled[df]["id"] = df_downsampled[df].index

    else:
        df_downsampled[df] = data[df]
        df_downsampled[df]["id"] = df_downsampled[df].index
        df_downsampled[df]["target"] = df_target
        
        
for i in df_downsampled.keys():
    print(f"shape of {i}: {df_downsampled[i].shape}")
    
# Combine all DataFrames
df_combined = pd.concat([df_downsampled[df] for df in df_list], ignore_index=True)

df_combined


shape of eps1: (2205, 62)
shape of ps3: (2205, 62)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,52,53,54,55,56,57,58,59,id,target
0,1225.527426,3077.202935,2755.495404,2989.621686,2917.196116,2971.731384,2930.727252,2965.267676,2928.917106,2979.661959,...,2407.023986,2422.396739,2396.041949,2438.080335,2370.738699,2475.851154,2304.707126,2621.339755,0,100
1,1218.617026,3039.869740,2863.079356,3001.596066,2910.140547,2971.551466,2926.468838,2963.055964,2925.841901,2975.982887,...,2394.597318,2409.128850,2384.086467,2424.760139,2358.724890,2463.175189,2293.135516,2608.581853,1,100
2,1212.488943,3026.982322,2852.802636,2992.282090,2901.039818,2961.584530,2916.840879,2953.471652,2916.460171,2966.807044,...,2381.666453,2396.118314,2371.318934,2412.256580,2346.585336,2450.974440,2281.774580,2594.926320,2,100
3,1205.793537,3016.145886,2848.331056,2983.604593,2894.170718,2955.452597,2910.368085,2947.516459,2909.186134,2961.150605,...,2376.156125,2388.823529,2363.327813,2405.211098,2339.019540,2440.600365,2272.692669,2584.702962,3,100
4,1200.132767,3004.750985,2841.304827,2975.350312,2885.809997,2947.384219,2902.759987,2939.945527,2902.688071,2953.640570,...,2366.413519,2380.237983,2356.517277,2398.492486,2331.594296,2436.032556,2267.593267,2579.500908,4,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4405,0.172926,0.011496,-0.018428,0.024396,-0.043644,0.063811,-0.104500,0.168088,-0.308903,0.980114,...,2.344719,2.342248,2.311307,2.359219,2.300111,2.390136,2.225378,2.583649,2200,100
4406,0.174585,0.011612,-0.018216,0.023935,-0.042724,0.062371,-0.102110,0.162070,-0.301379,0.960469,...,2.298340,2.330884,2.312942,2.351669,2.291068,2.402735,2.230432,2.545339,2201,100
4407,0.170944,0.011664,-0.018810,0.025126,-0.044919,0.065827,-0.107497,0.169427,-0.318937,1.016304,...,2.340973,2.351554,2.324610,2.339859,2.286507,2.403170,2.205626,2.521674,2202,100
4408,0.173025,0.011461,-0.018365,0.024314,-0.043512,0.063590,-0.104051,0.163943,-0.308924,0.981163,...,2.354437,2.351759,2.355719,2.375095,2.312541,2.435032,2.259279,2.562707,2203,100
