The data downloaded using the script "July_to_Aug23_data_download.py" gives non-uniformly resampled data

For this we first need to linearly interpolate to 8s 

Below given is the procedure on how to accomplish the same

In [1]:
import os
import pandas as pd

In [2]:
# specify the input path
ip_path = "/Users/harleenkaur/Documents/phyFai_thresholds_decision/UPS_download_interpolation/July_to_Aug23_csvs/"
# specify the path where you want to download
out_path = "/Users/harleenkaur/Documents/phyFai_thresholds_decision/UPS_download_interpolation/July_to_Aug23_interpolated_data/"
files = ['b173E8.csv']

In [53]:
def read_and_filter_data(file_path, diagnostic_key):
    df = pd.read_csv(file_path)
    filtered_df = df[df['diagnostic'] == diagnostic_key]
    return filtered_df


def process_and_interpolate(df, new_col_name, scale_factor=1, freq='8s'):
    
    
    df['dateTime'] = pd.to_datetime(df['dateTime'], utc=True)
    df = df.sort_values('dateTime')
    df = df[['dateTime', 'data']]
    df.columns = ['dateTime', new_col_name]

    if scale_factor != 1:
        df[new_col_name] = df[new_col_name] / scale_factor

    df.set_index('dateTime', inplace=True)
    df = df[~df.index.duplicated(keep='first')]
    full_index = df.index.union(pd.date_range(start=df.index[0], end=df.index[-1], freq=freq))
    df_interpolated = df.reindex(full_index.sort_values()).interpolate('linear')
    df_interpolated = df_interpolated.loc[pd.date_range(start=df.index[0], end=df.index[-1], freq=freq)]
    df_interpolated = df_interpolated.reset_index().rename(columns={'index': 'datetime_utc'})

    return df_interpolated

In [None]:
for file_name in files:
    file_path = os.path.join(ip_path, file_name)

    df_ect = read_and_filter_data(file_path, 'DiagnosticEngineCoolantTemperatureId')
    df_oilp = read_and_filter_data(file_path, 'DiagnosticOilPressureId')
    df_rpm = read_and_filter_data(file_path, 'DiagnosticEngineSpeedId')

    if not df_oilp.empty and not df_rpm.empty and not df_ect.empty:
        # pa has to converted to Kapa for oil-pressure
        df_oilp_interpolated = process_and_interpolate(df_oilp, 'OilP', scale_factor=1000)
        df_rpm_interpolated = process_and_interpolate(df_rpm, 'ENGINE RPM')
        df_ect_interpolated = process_and_interpolate(df_ect, 'ECT')

        # Now, df_ect_interpolated, df_oilp_interpolated, df_rpm_interpolated
        # are ready for further processing or merging


In [57]:
# non-uniform signal
df_ect[['dateTime', 'data']].sort_values('dateTime')

Unnamed: 0,dateTime,data
3,2025-07-11 16:13:02.860000+00:00,96.0
2,2025-07-11 16:13:41.480000+00:00,98.0
6,2025-07-11 16:14:01.813000+00:00,98.0
7,2025-07-11 16:17:18.637000+00:00,99.0
11,2025-07-11 16:19:29.060000+00:00,96.0
...,...,...
54104,2025-08-23 15:26:55.423000+00:00,96.0
54105,2025-08-23 15:27:23.273000+00:00,96.0
54102,2025-08-23 15:30:43.750000+00:00,95.0
54096,2025-08-23 15:33:21.917000+00:00,100.0


In [56]:
# 8s uniformly sampled signal
df_ect_interpolated


Unnamed: 0,datetime_utc,ECT
0,2025-07-11 16:13:02.860000+00:00,96.000000
1,2025-07-11 16:13:10.860000+00:00,96.400000
2,2025-07-11 16:13:18.860000+00:00,96.800000
3,2025-07-11 16:13:26.860000+00:00,97.200000
4,2025-07-11 16:13:34.860000+00:00,97.600000
...,...,...
464123,2025-08-23 15:36:06.860000+00:00,95.961538
464124,2025-08-23 15:36:14.860000+00:00,95.769231
464125,2025-08-23 15:36:22.860000+00:00,95.576923
464126,2025-08-23 15:36:30.860000+00:00,95.384615


For oil-pressure algorithm this signal is further smoothed to 1 min sampling frequency

In [91]:
def resample_data(df, sampling_freq):
    df = df.resample(sampling_freq).mean()
    return df


def resample_signals(df, freq='1T'):
    # If datetime is in index, reset it
    if df.index.name == 'datetime_utc':
        df = df.reset_index()

    df.columns = df.columns.str.strip()

    if 'datetime_utc' not in df.columns:
        raise ValueError(f"'datetime_utc' not found in columns: {df.columns.tolist()}")

    df['datetime_utc'] = pd.to_datetime(df['datetime_utc'], utc=True)

    df = df.set_index('datetime_utc')

    df_1T = df.resample(freq).mean().dropna()

    df_1T = df_1T.reset_index()

    return df_1T



In [92]:
df_rpm_1T = resample_signals(df_rpm_interpolated, freq='1T')
df_ect_1T = resample_signals(df_ect_interpolated, freq='1T')
df_oilp_1T = resample_signals(df_oilp_interpolated, freq='1T')
df_rpm_1T

  df_1T = df.resample(freq).mean().dropna()
  df_1T = df.resample(freq).mean().dropna()
  df_1T = df.resample(freq).mean().dropna()


Unnamed: 0,datetime_utc,ENGINE RPM
0,2025-08-04 22:00:00+00:00,641.083964
1,2025-08-04 22:01:00+00:00,733.397814
2,2025-08-04 22:02:00+00:00,734.180415
3,2025-08-04 22:03:00+00:00,734.963016
4,2025-08-04 22:04:00+00:00,735.745617
...,...,...
26969,2025-08-23 15:29:00+00:00,342.844444
26970,2025-08-23 15:30:00+00:00,254.177778
26971,2025-08-23 15:31:00+00:00,165.511111
26972,2025-08-23 15:32:00+00:00,76.844444


In [94]:
# merge the three datafrmes for getting the final dtafrme on which analysis can be performed
df_1T = pd.merge(df_rpm_1T, df_oilp_1T, how = 'inner', on = 'datetime_utc')
df_1T = pd.merge(df_1T, df_ect_1T, how = 'inner', on = 'datetime_utc')


In [95]:
df_1T

Unnamed: 0,datetime_utc,ENGINE RPM,OilP,ECT
0,2025-08-05 13:57:00+00:00,1481.564136,72.800000,76.396947
1,2025-08-05 13:58:00+00:00,1482.346737,69.703081,78.801527
2,2025-08-05 13:59:00+00:00,1483.129338,67.333333,81.206107
3,2025-08-05 14:00:00+00:00,1483.911938,70.274510,83.610687
4,2025-08-05 14:01:00+00:00,1484.694539,73.215686,86.015267
...,...,...,...,...
26012,2025-08-23 15:29:00+00:00,342.844444,75.500000,95.365385
26013,2025-08-23 15:30:00+00:00,254.177778,70.869565,95.184458
26014,2025-08-23 15:31:00+00:00,165.511111,64.521739,96.547619
26015,2025-08-23 15:32:00+00:00,76.844444,69.739130,98.333333


In [96]:
df_1T[['ENGINE RPM','OilP','ECT']].describe()

Unnamed: 0,ENGINE RPM,OilP,ECT
count,26017.0,26017.0,26017.0
mean,363.734713,70.876835,72.055038
std,336.463937,9.18011,24.122804
min,0.0,44.666667,20.078617
25%,91.906977,64.632218,50.822134
50%,306.4426,70.892815,76.83627
75%,522.35625,77.285714,95.612601
max,2080.741722,96.140351,107.703704
