In [1]:
# 1. IMPORTS
import pandas as pd
import numpy as np
from pathlib import Path
from constant import TempoTime as TempoTimeConstant

inputs = [
    ('cpu-usage', '../thesis-data/tabla-tempo-cpu.csv'),
    ('memory-usage', '../thesis-data/tempo-memory-usage.csv'),
    ('memory-working', '../thesis-data/tempo-memory-usage.csv'),
]

# 2. SETUP AND DATA LOADING
# Make sure to change this path to the correct location of your file
def process(prefix: str, file: str):
    df = pd.read_csv(file)
    pods = df['pod'].unique()

    output_dir = Path("./separated-data")
    output_dir.mkdir(parents=True, exist_ok=True)

    # 5. PROCESS AND PLOT DATA FOR EACH POD
    for pod in pods:
        # --- Data Preparation ---
        data = df[df['pod'] == pod].copy()
        data['date'] = pd.to_datetime(data['Time'], unit='ms', utc=True)
        data = data.rename(columns={'Value #A': 'value'})

        conditions = [
            (data['date'] >= TempoTimeConstant.utc_start_1) & (data['date'] < TempoTimeConstant.utc_end_1),
            (data['date'] >= TempoTimeConstant.utc_start_2) & (data['date'] < TempoTimeConstant.utc_end_2),
            (data['date'] >= TempoTimeConstant.utc_start_3) & (data['date'] < TempoTimeConstant.utc_end_3),
            (data['date'] >= TempoTimeConstant.utc_start_4) & (data['date'] < TempoTimeConstant.utc_end_4)
        ]
        choices = [1, 2, 3, 4]
        data['repetion'] = np.select(conditions, choices, default=0)

        
        repetition_starts = {
            1: TempoTimeConstant.utc_start_1,
            2: TempoTimeConstant.utc_start_2,
            3: TempoTimeConstant.utc_start_3,
            4: TempoTimeConstant.utc_start_4
        }

        start_times_col = data['repetion'].map(repetition_starts)

        data['delta_time'] = (data['date'] - start_times_col).dt.total_seconds()
        
        # Also add the measure number column for complete data
        data['measure_#'] = data.groupby('repetion').cumcount() + 1
        data.loc[data['repetion'] == 0, 'measure_#'] = 0

        # --- Save Processed Data ---
        out_df = data[['date', 'value', 'repetion', 'measure_#', 'delta_time']]
        file_path = output_dir / f"{prefix}-{pod.lower()}.csv"
        out_df.to_csv(file_path, index=False)

        print(f"Saved data for pod {pod} to {file_path}")
        print("\nSample of data with the new 'delta_time' column:")
        # Display rows from a repetition to see the new column in action
        print(out_df[out_df['repetion'] == 1].head())


for input in inputs:
    process(*input)

Saved data for pod sample-service-59d7988dd4-2r57f to separated-data/cpu-usage-sample-service-59d7988dd4-2r57f.csv

Sample of data with the new 'delta_time' column:
                        date     value  repetion  measure_#  delta_time
36 2025-09-21 05:37:00+00:00  0.000528         1          1         0.0
37 2025-09-21 05:37:15+00:00  0.002695         1          2        15.0
38 2025-09-21 05:37:30+00:00  0.003427         1          3        30.0
39 2025-09-21 05:37:45+00:00  0.003973         1          4        45.0
40 2025-09-21 05:38:00+00:00  0.010302         1          5        60.0
Saved data for pod tempo-0 to separated-data/cpu-usage-tempo-0.csv

Sample of data with the new 'delta_time' column:
                         date     value  repetion  measure_#  delta_time
153 2025-09-21 05:37:00+00:00  0.000564         1          1         0.0
154 2025-09-21 05:37:15+00:00  0.000608         1          2        15.0
155 2025-09-21 05:37:30+00:00  0.000725         1          3       