In [22]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import os

In [23]:
os.chdir('/Users/renatoboemer/code/lewagon/jet-engine/raw_data/Cmaps')

In [42]:
index_names = ["Engine", "Cycle"]
setting_names = ["Setting 1", "Setting 2", "Setting 3"]
sensor_names = ["(Fan Inlet Temperature) (◦R)",
               "(LPC Outlet Temperature) (◦R)",
               "(HPC Outlet Temperature) (◦R)",
               "(LPT Outlet Temperature) (◦R)",
               "(Fan Inlet Pressure) (psia)",
               "(Bypass-Duct Pressure) (psia)",
               "(HPC Outlet Pressure) (psia)",
               "(Physical Fan Speed) (rpm)",
               "(Physical Core Speed) (rpm)",
               "(Engine Pressure Ratio(P50/P2)",
               "(HPC Outlet Static Pressure) (psia)",
               "(Ratio of Fuel Flow to Ps30) (pps/psia)",
               "(Corrected Fan Speed) (rpm)",
               "(Corrected Core Speed) (rpm)",
               "(Bypass Ratio) ",
               "(Burner Fuel-Air Ratio)",
               "(Bleed Enthalpy)",
               "(Required Fan Speed)",
               "(Required Fan Conversion Speed)",
               "(High-Pressure Turbines Cool Air Flow)",
               "(Low-Pressure Turbines Cool Air Flow)",
               "Sensor 26",
               "Sensor 27"]

col_names = index_names + setting_names + sensor_names

In [43]:
df_train = pd.read_csv('/Users/renatoboemer/code/lewagon/jet-engine/raw_data/Cmaps/train_FD001.txt', sep='\s+', header=None, names=col_names)
df_test = pd.read_csv('/Users/renatoboemer/code/lewagon/jet-engine/raw_data/Cmaps/test_FD001.txt', sep='\s+', header=None, names=col_names)
df_test_RUL = pd.read_csv('/Users/renatoboemer/code/lewagon/jet-engine/raw_data/Cmaps/RUL_FD001.txt', sep='\s+', header=None, names=['RUL'])

In [44]:
def drop_constant_cols(df):
    """
    Drop columns that show a constant value, except for '(Physical Core Speed) (rpm)'.
    It suggests there is no change as the RUL reaches failure.
    """
    # Get columns with zero variance (constant value) except for '(Physical Core Speed) (rpm)'
    cols_to_drop = df.columns[(df.nunique() <= 1) & (df.columns != '(Physical Core Speed) (rpm)')]
    
    # Drop the constant columns (excluding '(Physical Core Speed) (rpm)') and return the resulting DataFrame
    return df.drop(columns=cols_to_drop)


In [45]:
drop_constant_cols(df_train)

Unnamed: 0,Engine,Cycle,Setting 1,Setting 2,(LPC Outlet Temperature) (◦R),(HPC Outlet Temperature) (◦R),(LPT Outlet Temperature) (◦R),(Bypass-Duct Pressure) (psia),(HPC Outlet Pressure) (psia),(Physical Fan Speed) (rpm),(Physical Core Speed) (rpm),(HPC Outlet Static Pressure) (psia),(Ratio of Fuel Flow to Ps30) (pps/psia),(Corrected Fan Speed) (rpm),(Corrected Core Speed) (rpm),(Bypass Ratio),(Bleed Enthalpy),(High-Pressure Turbines Cool Air Flow),(Low-Pressure Turbines Cool Air Flow)
0,1,1,-0.0007,-0.0004,641.82,1589.70,1400.60,21.61,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.4190
1,1,2,0.0019,-0.0003,642.15,1591.82,1403.14,21.61,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.00,23.4236
2,1,3,-0.0043,0.0003,642.35,1587.99,1404.20,21.61,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442
3,1,4,0.0007,0.0000,642.35,1582.79,1401.87,21.61,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739
4,1,5,-0.0019,-0.0002,642.37,1582.85,1406.22,21.61,554.00,2388.06,9055.15,47.28,522.19,2388.04,8133.80,8.4294,393,38.90,23.4044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,-0.0004,-0.0003,643.49,1597.98,1428.63,21.61,551.43,2388.19,9065.52,48.07,519.49,2388.26,8137.60,8.4956,397,38.49,22.9735
20627,100,197,-0.0016,-0.0005,643.54,1604.50,1433.58,21.61,550.86,2388.23,9065.11,48.04,519.68,2388.22,8136.50,8.5139,395,38.30,23.1594
20628,100,198,0.0004,0.0000,643.42,1602.46,1428.18,21.61,550.94,2388.24,9065.90,48.09,520.01,2388.24,8141.05,8.5646,398,38.44,22.9333
20629,100,199,-0.0011,0.0003,643.23,1605.26,1426.53,21.61,550.68,2388.25,9073.72,48.39,519.67,2388.23,8139.29,8.5389,395,38.29,23.0640


In [54]:
def drop_constant_cols(df):
    return df.columns[(df.nunique() <= 1) | (df.columns == '(Physical Core Speed) (rpm)')]

In [55]:
drop_constant_cols(df_train)

Index(['Setting 3', '(Fan Inlet Temperature) (◦R)',
       '(Fan Inlet Pressure) (psia)', '(Physical Core Speed) (rpm)',
       '(Engine Pressure Ratio(P50/P2)', '(Burner Fuel-Air Ratio)',
       '(Required Fan Speed)', '(Required Fan Conversion Speed)', 'Sensor 26',
       'Sensor 27'],
      dtype='object')

In [56]:
original = ['Setting 3', '(Fan Inlet Temperature) (◦R)', 
            '(Physical Core Speed) (rpm)',
            '(Fan Inlet Pressure) (psia)', '(Engine Pressure Ratio(P50/P2)',
            '(Burner Fuel-Air Ratio)', '(Required Fan Speed)', 
            '(Required Fan Conversion Speed)', 'Sensor 26', 'Sensor 27']

new = ['Setting 3', '(Fan Inlet Temperature) (◦R)',
       '(Fan Inlet Pressure) (psia)', '(Physical Core Speed) (rpm)',
       '(Engine Pressure Ratio(P50/P2)', '(Burner Fuel-Air Ratio)',
       '(Required Fan Speed)', '(Required Fan Conversion Speed)', 'Sensor 26',
       'Sensor 27']

In [57]:
# Convert the lists to sets
original_set = set(original)
new_set = set(new)

# Check if the sets are equal
if original_set == new_set:
    print("The lists have the exact same values.")
else:
    print("The lists do not have the exact same values.")

The lists have the exact same values.
