In [1]:
import pandas as pd

# Load the dataset
file_path = '../data/dataset_reduced-10min-one-target-variable-TI0036.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')

# Identify where RFCC is active
rfcc_active = df['530C021D01.FIC0203.MEAS'] >= 95

# Mark continuous groups
df['rfcc_active'] = rfcc_active.astype(int)
df['group'] = (df['rfcc_active'] != df['rfcc_active'].shift()).cumsum()

# Find sizes of all active segments
active_groups = df[df['rfcc_active'] == 1].groupby('group')
group_sizes = active_groups.size()

# Find the second longest active segment
second_longest_group = group_sizes.sort_values(ascending=False).index[1]

# Filter to the second longest segment
df_active = df[df['group'] == second_longest_group].copy()

# Drop helper columns
df_active.drop(columns=['rfcc_active', 'group'], inplace=True)

# Output window info
start_time = df_active['DateTime'].min()
end_time = df_active['DateTime'].max()
num_rows = len(df_active)
print(f"📅 Second longest active RFCC window: {start_time} → {end_time}")
print(f"🔢 Window length: {num_rows} rows")


📅 Second longest active RFCC window: 2024-01-23 16:19:30 → 2024-04-16 22:06:00
🔢 Window length: 242614 rows


In [25]:
import pandas as pd

# Load the dataset
file_path = '../data/dataset_reduced-10min-one-target-variable-TI0036.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')

# Identify where RFCC is active
rfcc_active = df['530C021D01.FIC0203.MEAS'] >= 95

# Mark continuous groups
df['rfcc_active'] = rfcc_active.astype(int)
df['group'] = (df['rfcc_active'] != df['rfcc_active'].shift()).cumsum()

# Find sizes of all active segments
active_groups = df[df['rfcc_active'] == 1].groupby('group')
group_sizes = active_groups.size()

# Find the second longest active segment
second_longest_group = group_sizes.sort_values(ascending=False).index[1]

# Filter to the second longest segment
df_active = df[df['group'] == second_longest_group].copy()

# Drop helper columns
df_active.drop(columns=['rfcc_active', 'group'], inplace=True)

# Output window info
start_time = df_active['DateTime'].min()
end_time = df_active['DateTime'].max()
num_rows = len(df_active)
print(f"📅 Second longest active RFCC window: {start_time} → {end_time}")
print(f"🔢 Window length: {num_rows} rows")


📅 Second longest active RFCC window: 2024-01-23 16:18:00 → 2024-04-16 22:03:00
🔢 Window length: 40436 rows


In [26]:
df_active.shape

(40436, 27)

In [27]:
# Save the reduced dataset
df_active.to_csv('../data/test_set_window.csv', index=False)