In [2]:
import pandas as pd
import ast
from pathlib import Path

# 1) Read the CSV of dropped uneven‐x/y rows
input_path = Path(r"C:\store\git\km-stat-activity\data\real\20250414\km_stat_acer_20250414_le120s.csv")

def parse_list(s):
    """Safely parse a Python list literal from a string, else return []"""
    if not isinstance(s, str) or not s.strip():
        return []
    return ast.literal_eval(s)

df = pd.read_csv(
    input_path,
    converters={
        "x": parse_list,
        "y": parse_list
    }
)

# 2) Compute the length of each x and y list
df['len_x'] = df['x'].apply(len)
df['len_y'] = df['y'].apply(len)

# 3) Group by (len_x, len_y) and count occurrences
counts = (
    df
    .groupby(['len_x', 'len_y'])
    .size()
    .reset_index(name='count')
    .sort_values('count', ascending=False)
    .reset_index(drop=True)
)

# 4) Display the result
print(" array length for x and y    count")
print("--------------------------   -----")
for _, row in counts.iterrows():
    lx, ly, c = row['len_x'], row['len_y'], row['count']
    print(f"       {lx:3d},{ly:3d}            {c}")


 array length for x and y    count
--------------------------   -----
         0,  0            27323
         2,  2            455
         3,  3            282
         4,  4            220
         5,  5            172
         6,  6            138
         7,  7            113
         9,  9            108
         8,  8            100
         1,  2            86
         3,  2            69
        13, 13            68
         2,  1            67
        11, 11            65
        10, 10            65
         5,  4            58
         4,  3            57
        12, 12            57
        16, 16            56
         6,  7            55
         6,  5            52
        14, 14            52
        11, 10            52
        12, 13            51
        10,  9            50
        10, 11            50
         8,  9            49
         7,  6            49
        13, 12            49
         7,  8            48
         3,  4            48
         2,  3      

In [6]:
import pandas as pd
import ast
from pathlib import Path

# 1) Read the CSV of dropped uneven‐x/y rows
input_path = Path(r"C:\store\git\km-stat-activity\data\real\20250415\km_stat_acer_20250415_le120s_intervals_even_xy.csv")

def parse_list(s):
    """Safely parse a Python list literal from a string, else return []"""
    if not isinstance(s, str) or not s.strip():
        return []
    return ast.literal_eval(s)

df = pd.read_csv(
    input_path,
    converters={
        "x": parse_list,
        "y": parse_list
    }
)

# 2) Compute the length of each x and y list
df['len_x'] = df['x'].apply(len)
df['len_y'] = df['y'].apply(len)

# 3) Group by (len_x, len_y) and count occurrences
counts = (
    df
    .groupby(['len_x', 'len_y'])
    .size()
    .reset_index(name='count')
    .sort_values('count', ascending=False)
    .reset_index(drop=True)
)

# 4) Display the result
print(" array length for x and y    count")
print("--------------------------   -----")
for _, row in counts.iterrows():
    lx, ly, c = row['len_x'], row['len_y'], row['count']
    print(f"       {lx:3d},{ly:3d}            {c}")


 array length for x and y    count
--------------------------   -----
         0,  0            32073
         2,  2            508
         3,  3            316
         4,  4            233
         5,  5            182
         6,  6            131
         8,  8            113
         7,  7            105
         9,  9            91
        10, 10            89
        12, 12            68
        13, 13            63
        15, 15            58
        11, 11            51
        16, 16            44
        14, 14            41
        18, 18            36
        17, 17            36
        21, 21            32
        26, 26            31
        19, 19            27
        22, 22            27
        23, 23            26
        38, 38            26
        20, 20            24
        25, 25            24
        29, 29            24
        28, 28            23
        24, 24            22
        48, 48            22
        35, 35            21
        34, 34       