# Fault Analysis of AUX converters

### 1. Load and Prepare Data

In [111]:
import pandas as pd

file_path = "sample_data.xlsx"

df = pd.read_excel(file_path, skiprows=3)

df.head()

Unnamed: 0,Field Name,Units,2024-12-26 17:39:40,2024-12-26 17:39:41,2024-12-26 17:39:42,2024-12-26 17:39:43,2024-12-26 17:39:44,2024-12-26 17:39:45,2024-12-26 17:39:45.1,2024-12-26 17:39:46,2024-12-26 17:39:47,2024-12-26 17:39:48
0,Packet_Index,,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0
1,Loco_speed,KMPH,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Acceleration,m/sec2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,OHE_Volt_KV,KV,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2
4,OHE_Current,Amps,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Step 2. Cleanup the code (omit all string valued rows)

In [112]:
# Omit all rows where they have only string values in the columns C-L

df = df[df.iloc[:, 2:12].map(pd.api.types.is_number).any(axis=1)].reset_index(
    drop=True
)

df.head()

Unnamed: 0,Field Name,Units,2024-12-26 17:39:40,2024-12-26 17:39:41,2024-12-26 17:39:42,2024-12-26 17:39:43,2024-12-26 17:39:44,2024-12-26 17:39:45,2024-12-26 17:39:45.1,2024-12-26 17:39:46,2024-12-26 17:39:47,2024-12-26 17:39:48
0,Packet_Index,,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0
1,Loco_speed,KMPH,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Acceleration,m/sec2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,OHE_Volt_KV,KV,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2
4,OHE_Current,Amps,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Step 3. Skip rows where values remained the same during before and after cases

In [113]:

def before_after_equal(row):
    before = row.iloc[2:7].values  # Get 'before' values
    after = row.iloc[7:12].values  # Get 'after' values
    return (before == after).all()  # Compare


# Apply the filter to remove rows where before == after
df = df[~df.apply(before_after_equal, axis=1)].reset_index(drop=True)
df.head()

Unnamed: 0,Field Name,Units,2024-12-26 17:39:40,2024-12-26 17:39:41,2024-12-26 17:39:42,2024-12-26 17:39:43,2024-12-26 17:39:44,2024-12-26 17:39:45,2024-12-26 17:39:45.1,2024-12-26 17:39:46,2024-12-26 17:39:47,2024-12-26 17:39:48
0,Packet_Index,,0,0,0,0,0,0,24,0,0,0
1,Bogie1_DCLV,Volts,577,573,563,557,560,559,550,554,538,545
2,Bogie2_DCLV,Volts,572,576,554,575,559,551,550,561,546,542
3,Aux_Winding_I,Amps,11,11,12,12,14,11,12,9,11,12
4,Bogie1_T_F_Oil_Temp,Deg c,48,48,48,48,48,48,48,48,49,48


### Step 3.1 Further cleaning of NaN, NUM unit values

In [114]:
# Remove rows where the "Units" column is NaN or equals "NUM"
if "Units" in df.columns:
    df = df[~(df["Units"].isna() | (df["Units"] == "NUM"))].reset_index(drop=True)
df.head()

Unnamed: 0,Field Name,Units,2024-12-26 17:39:40,2024-12-26 17:39:41,2024-12-26 17:39:42,2024-12-26 17:39:43,2024-12-26 17:39:44,2024-12-26 17:39:45,2024-12-26 17:39:45.1,2024-12-26 17:39:46,2024-12-26 17:39:47,2024-12-26 17:39:48
0,Bogie1_DCLV,Volts,577,573,563,557,560,559,550,554,538,545
1,Bogie2_DCLV,Volts,572,576,554,575,559,551,550,561,546,542
2,Aux_Winding_I,Amps,11,11,12,12,14,11,12,9,11,12
3,Bogie1_T_F_Oil_Temp,Deg c,48,48,48,48,48,48,48,48,49,48
4,VCU1_DIP_Temp,Deg c,36,36,37,36,37,37,36,36,36,37


### Step 4. Calculate the mean values of before and after values

In [115]:
# Calculate mean of columns C-G as 'before' and H-L as 'after'
