In [2]:
import pandas as pd

# Load the dataset
file_path = '../data/dataset_reduced-10min-one-target-variable-TI0036.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')

# Identify where RFCC is active
rfcc_active = df['530C021D01.FIC0203.MEAS'] >= 95

# Mark continuous groups
df['rfcc_active'] = rfcc_active.astype(int)
df['group'] = (df['rfcc_active'] != df['rfcc_active'].shift()).cumsum()

# Find longest active segment
active_groups = df[df['rfcc_active'] == 1].groupby('group')
longest_active_segment = active_groups.size().idxmax()

# Filter to longest segment
df_active = df[df['group'] == longest_active_segment].copy()

# Drop helper columns
df_active.drop(columns=['rfcc_active', 'group'], inplace=True)

# Output window info
start_time = df_active['DateTime'].min()
end_time = df_active['DateTime'].max()
num_rows = len(df_active)
print(f"📅 Longest active RFCC window: {start_time} → {end_time}")
print(f"🔢 Window length: {num_rows} rows")

# Save the reduced dataset
df_active.to_csv('../data/rfcc_longest_active_window.csv', index=False)


📅 Longest active RFCC window: 2024-04-17 04:10:00 → 2024-08-24 16:40:00
🔢 Window length: 18652 rows


In [3]:
df_active.shape

(18652, 27)