In [9]:
import pandas as pd

# Load the dataset
file_path = '../data/dataset_reduced-10min-one-target-variable-TI0036.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')

# Identify where RFCC is active
rfcc_active = df['530C021D01.FIC0203.MEAS'] >= 95

# Mark continuous groups
df['rfcc_active'] = rfcc_active.astype(int)
df['group'] = (df['rfcc_active'] != df['rfcc_active'].shift()).cumsum()

# Find sizes of all active segments
active_groups = df[df['rfcc_active'] == 1].groupby('group')
group_sizes = active_groups.size()

# Find the second longest active segment
second_longest_group = group_sizes.sort_values(ascending=False).index[0]

# Filter to the second longest segment
df_active = df[df['group'] == second_longest_group].copy()

# Drop helper columns
df_active.drop(columns=['rfcc_active', 'group'], inplace=True)

# Output window info
start_time = df_active['DateTime'].min()
end_time = df_active['DateTime'].max()
num_rows = len(df_active)
print(f"📅 Second longest active RFCC window: {start_time} → {end_time}")
print(f"🔢 Window length: {num_rows} rows")


📅 Second longest active RFCC window: 2024-04-17 04:12:00 → 2024-08-24 16:51:00
🔢 Window length: 62174 rows


In [9]:
import pandas as pd

# Load the dataset
file_path = '../data/dataset_reduced-10min-one-target-variable-TI0036.csv'
df = pd.read_csv(file_path, delimiter=',')
df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')

# Identify where RFCC is active
rfcc_active = df['530C021D01.FIC0203.MEAS'] >= 95

# Mark continuous groups
df['rfcc_active'] = rfcc_active.astype(int)
df['group'] = (df['rfcc_active'] != df['rfcc_active'].shift()).cumsum()

# Find sizes of all active segments
active_groups = df[df['rfcc_active'] == 1].groupby('group')
group_sizes = active_groups.size()

# Get the first and third longest active segments
top_groups = group_sizes.sort_values(ascending=False).index[[0, 2]]

# Concatenate the corresponding windows
df_active_0 = df[df['group'] == top_groups[0]].copy()
df_active_2 = df[df['group'] == top_groups[1]].copy()
df_active_concat = pd.concat([df_active_0, df_active_2]).sort_values('DateTime')

# Drop helper columns
df_active_concat.drop(columns=['rfcc_active', 'group'], inplace=True)

# Output info
start_time = df_active_concat['DateTime'].min()
end_time = df_active_concat['DateTime'].max()
num_rows = len(df_active_concat)
print(f"📅 Concatenated active RFCC windows: {start_time} → {end_time}")
print(f"🔢 Total concatenated window length: {num_rows} rows")


📅 Concatenated active RFCC windows: 2024-04-17 04:12:00 → 2024-12-06 17:21:00
🔢 Total concatenated window length: 92277 rows


In [6]:
df_active.shape

NameError: name 'df_active' is not defined

In [10]:
df_active_concat.shape

(92277, 26)

In [11]:
# Save the reduced dataset
df_active.to_csv('../data/rfcc_longest_active_window.csv', index=False)

In [11]:
# Save the reduced dataset
df_active_concat.to_csv('../data/rfcc_longest_active_window.csv', index=False)