MIT License

Copyright (c) 2026 Eric Ahlqvist and Richard Pink

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

In [None]:
import pandas as pd
import json
import numpy as np

# Configuration - modify these as needed
json_file = "../3-SmallGroupIds/D-and-small-group-id.json"  # Change to your JSON file path
# json_file = "D-and-small-group-id.json"
# json_file = "D-IdSmallGroup-Kap-type.json"

# Load JSON file
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(data)

# Display basic information
print("=" * 60)
print("Dataset Overview")
print("=" * 60)
print(f"Number of rows: {len(df)}")
print(f"Number of columns: {len(df.columns)}")
print(f"\nColumn names: {list(df.columns)}")
print("\n" + "=" * 60)
print("First few rows:")
print("=" * 60)
print(df.head())

print("\n" + "=" * 60)
print("Data types and non-null counts:")
print("=" * 60)
print(df.info())

print("\n" + "=" * 60)
print("Basic statistics (for numeric columns):")
print("=" * 60)
print(df.describe())

# Display the DataFrame
df

Dataset Overview
Number of rows: 461925
Number of columns: 2

Column names: ['D', 'IdSmallGroup']

First few rows:
       D IdSmallGroup
0   3299  [ 243, 18 ]
1   4027   [ 243, 5 ]
2  11651  [ 243, 17 ]
3  12067   [ 243, 9 ]
4  12131   [ 243, 7 ]

Data types and non-null counts:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461925 entries, 0 to 461924
Data columns (total 2 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   D             461925 non-null  object
 1   IdSmallGroup  461925 non-null  object
dtypes: object(2)
memory usage: 7.0+ MB
None

Basic statistics (for numeric columns):
             D IdSmallGroup
count   461925       461925
unique  461925           19
top       3299   [ 243, 5 ]
freq         1        83353


Unnamed: 0,D,IdSmallGroup
0,3299,"[ 243, 18 ]"
1,4027,"[ 243, 5 ]"
2,11651,"[ 243, 17 ]"
3,12067,"[ 243, 9 ]"
4,12131,"[ 243, 7 ]"
...,...,...
461920,80471663,"[ 243, 17 ]"
461921,83650879,"[ 243, 15 ]"
461922,83656319,"[ 243, 18 ]"
461923,86710719,"[ 243, 3 ]"


In [2]:
# Set pandas display options to show full width
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # Show full content of each column (no truncation)
pd.set_option('display.width', 10000)  # Set width to large number
pd.set_option('display.max_seq_items', None)  # Show full sequences in lists
pd.set_option('display.colheader_justify', 'right')  # Left-align column headers




In [None]:
import json

# Load the IPADs.json file
ipads_file = "../IPADs/IPADs.json"
ids_file = "D-and-small-group-id.json"
output_file = "IPAD-and-ID-3.json"

print("Loading IPADs.json...")
with open(ipads_file, 'r') as f:
    ipads_data = json.load(f)
print(f"Loaded {len(ipads_data)} entries from IPADs.json")

print("\nLoading D-and-small-group-id.json...")
with open(ids_file, 'r') as f:
    ids_data = json.load(f)
print(f"Loaded {len(ids_data)} entries from D-and-small-group-id.json")

# Create a dictionary mapping D to IdSmallGroup for fast lookup
print("\nCreating lookup dictionary...")
d_to_id = {}
for entry in ids_data:
    d_to_id[entry["D"]] = entry["IdSmallGroup"]
print(f"Created lookup dictionary with {len(d_to_id)} entries")

# Merge the data
print("\nMerging data...")
merged_data = []
not_found = []
for entry in ipads_data:
    d_value = entry["D"]
    if d_value in d_to_id:
        merged_entry = {
            "D": d_value,
            "IdSmallGroup": d_to_id[d_value],
            "IPAD": entry["IPAD"]
        }
        merged_data.append(merged_entry)
    else:
        not_found.append(d_value)
        # Still include it but without IdSmallGroup (or you can skip it)
        merged_entry = {
            "D": d_value,
            "IdSmallGroup": None,
            "IPAD": entry["IPAD"]
        }
        merged_data.append(merged_entry)

print(f"Merged {len(merged_data)} entries")
if not_found:
    print(f"Warning: {len(not_found)} D values not found in lookup file")
    print(f"First 10 not found: {not_found[:10]}")

# Write the merged data to output file
print(f"\nWriting to {output_file}...")
with open(output_file, 'w') as f:
    json.dump(merged_data, f, indent=2, ensure_ascii=False)
print(f"Successfully wrote {len(merged_data)} entries to {output_file}")

# Show a sample of the merged data
print("\nSample merged entry:")
if merged_data:
    print(json.dumps(merged_data[0], indent=2))

Loading IPADs.json...
Loaded 461925 entries from IPADs.json

Loading D-and-small-group-id.json...
Loaded 461925 entries from D-and-small-group-id.json

Creating lookup dictionary...
Created lookup dictionary with 461925 entries

Merging data...
Merged 461925 entries

Writing to IPAD-and-ID-3.json...
Successfully wrote 461925 entries to IPAD-and-ID-3.json

Sample merged entry:
{
  "D": "3299",
  "IdSmallGroup": "[ 243, 18 ]",
  "IPAD": [
    [
      9,
      3
    ],
    [
      [
        [
          9,
          3,
          3
        ],
        2
      ],
      [
        [
          27,
          3
        ],
        2
      ]
    ]
  ]
}


In [None]:
json_file = "gap-files/all_rels_20260106a_with_ids.json"
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
id_rels = pd.DataFrame(data)
filtered_df = id_rels[id_rels['IdSmallGroup'] == '[ 243, 4 ]']
print(filtered_df.head(15))

          D       Rel_B                                           Rel_M IdSmallGroup
16    27355  [a^6, b^3]                           [a^3, (a_b_a)^-1*b^6]   [ 243, 4 ]
24    37219  [a^6, b^6]                           [a^3*(a_b_a)^-2, b^3]   [ 243, 4 ]
25    39819  [a^3, b^6]                            [a^6*(a_b_b)^1, b^3]   [ 243, 4 ]
31    43827  [a^3, b^6]             [a^6*(a_b_a)^-1, a^6*(a_b_b)^1*b^3]   [ 243, 4 ]
40    54251  [a^6, b^6]                            [a^3*(a_b_b)^2, b^3]   [ 243, 4 ]
70    98347  [a^6, b^3]      [a^3*(a_b_a)^-2, (a_b_a)^-1*(a_b_b)^2*b^6]   [ 243, 4 ]
83   107491  [a^6, b^3]             [a^3*(a_b_a)^-1, a^6*(a_b_b)^1*b^6]   [ 243, 4 ]
100  121403  [a^6, b^6]             [a^3*(a_b_a)^-2, a^6*(a_b_b)^1*b^3]   [ 243, 4 ]
109  130523  [a^6, b^6]                 [a^3*(a_b_a)^-1, (a_b_b)^1*b^3]   [ 243, 4 ]
139  160547  [a^6, b^6]                            [a^3*(a_b_b)^2, b^3]   [ 243, 4 ]
140  160899  [a^6, b^6]             [a^3*(a_b_a)^-1, a^3*(a_b_b)^

In [12]:
json_file = "gap-files/all_rels_20260106a_with_ids.json"
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
id_rels = pd.DataFrame(data)
filtered_df = id_rels[id_rels['IdSmallGroup'] == '[ 243, 6 ]']
print(filtered_df.head(15))

          D       Rel_B                                       Rel_M IdSmallGroup
5     16627  [a^6, b^3]             [a^3*(a_b_a)^-2*(a_b_b)^2, b^6]   [ 243, 6 ]
8     18555  [a^6, b^6]         [a^3*(a_b_b)^2, a^6*(a_b_a)^-1*b^3]   [ 243, 6 ]
14    23683  [a^3, b^3]             [a^6*(a_b_a)^-2*(a_b_b)^1, b^6]   [ 243, 6 ]
18    34027  [a^3, b^3]         [a^6*(a_b_b)^1, a^6*(a_b_a)^-2*b^6]   [ 243, 6 ]
28    42859  [a^3, b^6]         [a^6*(a_b_a)^-1*(a_b_b)^1, a^6*b^3]   [ 243, 6 ]
38    53843  [a^3, b^3]                        [a^6*(a_b_b)^2, b^6]   [ 243, 6 ]
49    65203  [a^6, b^6]                       [a^3, (a_b_a)^-1*b^3]   [ 243, 6 ]
54    79163  [a^3, b^6]             [a^6*(a_b_a)^-2*(a_b_b)^1, b^3]   [ 243, 6 ]
60    90163  [a^3, b^3]         [a^6*(a_b_a)^-1*(a_b_b)^1, a^3*b^6]   [ 243, 6 ]
76   103379  [a^3, b^6]                       [a^6, (a_b_a)^-1*b^3]   [ 243, 6 ]
89   114403  [a^6, b^6]  [a^3*(a_b_a)^-1*(a_b_b)^1, (a_b_a)^-2*b^3]   [ 243, 6 ]
96   119019  [a^6, b^3]     

In [10]:
json_file = "gap-files/all_rels_20260106a_with_ids.json"
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
id_rels = pd.DataFrame(data)
filtered_df = id_rels[id_rels['IdSmallGroup'] == '[ 243, 8 ]']
print(filtered_df.head(15))

          D       Rel_B                                                 Rel_M IdSmallGroup
6     17131  [a^3, b^3]                   [a^6*(a_b_a)^-2*(a_b_b)^2, a^3*b^6]   [ 243, 8 ]
12    22395  [a^6, b^6]                   [a^3*(a_b_a)^-2*(a_b_b)^1, a^6*b^3]   [ 243, 8 ]
13    22443  [a^3, b^6]  [a^6*(a_b_a)^-2*(a_b_b)^2, (a_b_a)^-1*(a_b_b)^1*b^3]   [ 243, 8 ]
21    34867  [a^3, b^6]  [a^6*(a_b_a)^-2*(a_b_b)^2, (a_b_a)^-1*(a_b_b)^1*b^3]   [ 243, 8 ]
23    35539  [a^6, b^6]                   [a^3*(a_b_b)^1, a^3*(a_b_a)^-1*b^3]   [ 243, 8 ]
27    42619  [a^6, b^6]            [a^3*(a_b_a)^-1*(a_b_b)^1, (a_b_a)^-1*b^3]   [ 243, 8 ]
35    48667  [a^3, b^6]            [a^6*(a_b_a)^-1*(a_b_b)^2, (a_b_a)^-1*b^3]   [ 243, 8 ]
48    65051  [a^6, b^6]                   [a^3*(a_b_b)^1, a^3*(a_b_a)^-1*b^3]   [ 243, 8 ]
63    92827  [a^3, b^6]            [a^6*(a_b_a)^-1*(a_b_b)^2, (a_b_a)^-1*b^3]   [ 243, 8 ]
92   116763  [a^3, b^6]            [a^6*(a_b_a)^-2*(a_b_b)^2, (a_b_a)^-1*b^3]   [ 243, 8 ]

In [13]:
json_file = "gap-files/all_rels_20260106a_with_ids.json"
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
id_rels = pd.DataFrame(data)
filtered_df = id_rels[id_rels['IdSmallGroup'] == '[ 243, 14 ]']
print(filtered_df.head(15))

          D  Rel_B                                   Rel_M IdSmallGroup
10    19427  [a^6]             [(a_b_a)^-1, a^3*(a_b_b)^2]  [ 243, 14 ]
17    27635  [b^6]             [(a_b_b)^2, (a_b_a)^-2*b^3]  [ 243, 14 ]
45    61771  [b^6]   [(a_b_b)^2, (a_b_a)^-2*(a_b_b)^1*b^3]  [ 243, 14 ]
47    64571  [a^6]             [(a_b_a)^-2, a^3*(a_b_b)^2]  [ 243, 14 ]
136  158035  [a^3]             [(a_b_a)^-1, a^6*(a_b_b)^1]  [ 243, 14 ]
173  193339  [a^3]             [(a_b_a)^-1, a^6*(a_b_b)^1]  [ 243, 14 ]
206  229819  [b^3]   [(a_b_b)^2, (a_b_a)^-1*(a_b_b)^1*b^6]  [ 243, 14 ]
218  245435  [a^6]  [(a_b_a)^-2, a^3*(a_b_a)^-1*(a_b_b)^2]  [ 243, 14 ]
226  256787  [b^6]             [(a_b_b)^2, (a_b_a)^-2*b^3]  [ 243, 14 ]
242  274035  [b^3]             [(a_b_b)^1, (a_b_a)^-1*b^6]  [ 243, 14 ]
249  279427  [b^6]   [(a_b_b)^2, (a_b_a)^-2*(a_b_b)^1*b^3]  [ 243, 14 ]
274  310051  [b^3]             [(a_b_b)^1, (a_b_a)^-1*b^6]  [ 243, 14 ]
291  323755  [a^6]             [(a_b_a)^-1, a^3*(a_b_b)^2]  [ 24

In [5]:
json_file = "gap-files/all_rels_20260106a_with_ids.json"
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
id_rels = pd.DataFrame(data)
filtered_df = id_rels[id_rels['IdSmallGroup'] == '[ 729, 9 ]']
print(filtered_df.head(15))

            D  Rel_B  Rel_M IdSmallGroup
404    425131  [b^3]  [b^6]   [ 729, 9 ]
1100  1027379  [a^6]  [a^3]   [ 729, 9 ]
1366  1223931  [b^6]  [b^3]   [ 729, 9 ]
1383  1239763  [a^6]  [a^3]   [ 729, 9 ]
1457  1294771  [a^6]  [a^3]   [ 729, 9 ]
2358  1987939  [a^3]  [a^6]   [ 729, 9 ]
2621  2169499  [b^3]  [b^6]   [ 729, 9 ]
2740  2264843  [b^3]  [b^6]   [ 729, 9 ]
2945  2428771  [b^3]  [b^6]   [ 729, 9 ]
2950  2431211  [a^6]  [a^3]   [ 729, 9 ]
3015  2473451  [a^6]  [a^3]   [ 729, 9 ]
3046  2501691  [a^3]  [a^6]   [ 729, 9 ]
3341  2703547  [a^3]  [a^6]   [ 729, 9 ]
3542  2848883  [a^3]  [a^6]   [ 729, 9 ]
4315  3410659  [b^6]  [b^3]   [ 729, 9 ]


In [4]:
json_file = "gap-files/all_rels_20260106a_with_ids.json"
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
id_rels = pd.DataFrame(data)
filtered_df = id_rels[id_rels['IdSmallGroup'] == '[ 729, 12 ]']
print(filtered_df.head(15))

            D  Rel_B             Rel_M IdSmallGroup
559    566947  [a^6]  [a^3*(a_b_a)^-2]  [ 729, 12 ]
567    570883  [b^3]   [(a_b_b)^2*b^6]  [ 729, 12 ]
568    572179  [b^3]   [(a_b_b)^2*b^6]  [ 729, 12 ]
720    697811  [a^3]  [a^6*(a_b_a)^-1]  [ 729, 12 ]
738    720619  [b^3]   [(a_b_b)^2*b^6]  [ 729, 12 ]
1066   997363  [a^6]  [a^3*(a_b_a)^-1]  [ 729, 12 ]
1518  1345011  [b^3]   [(a_b_b)^2*b^6]  [ 729, 12 ]
1582  1391851  [b^3]   [(a_b_b)^1*b^6]  [ 729, 12 ]
1663  1454043  [b^3]   [(a_b_b)^1*b^6]  [ 729, 12 ]
1850  1591467  [a^3]  [a^6*(a_b_a)^-1]  [ 729, 12 ]
1910  1647203  [a^6]  [a^3*(a_b_a)^-2]  [ 729, 12 ]
1986  1697987  [a^6]  [a^3*(a_b_a)^-1]  [ 729, 12 ]
2272  1912835  [a^6]  [a^3*(a_b_a)^-2]  [ 729, 12 ]
2400  2019763  [a^6]  [a^3*(a_b_a)^-1]  [ 729, 12 ]
2412  2029499  [a^3]  [a^6*(a_b_a)^-2]  [ 729, 12 ]


In [23]:
# Configuration - modify these as needed
json_file = "gap-files/IPAD-and-ID-3.json"  # Change to your JSON file path
# json_file = "D-and-small-group-id.json"
# json_file = "D-IdSmallGroup-Kap-type.json"

# Load JSON file
with open(json_file, 'r') as f:
    data = json.load(f)

# Convert to DataFrame
ipad_id_df = pd.DataFrame(data)

In [24]:
len(ipad_id_df)

461925

In [None]:
# FIXED VERSION: Find IdSmallGroup for IPADs with normalization (ignoring order of second component)

# Helper function to normalize IPAD by sorting the second component
def normalize_ipad(ipad):
    """
    Normalize an IPAD by sorting the second component.
    IPAD format: [K_cyc, [[[cyc1, count1], [cyc2, count2], ...]]]
    We sort the inner list of [cyc, count] pairs by converting to tuples.
    """
    if not isinstance(ipad, list) or len(ipad) != 2:
        return ipad
    
    K_cyc = ipad[0]
    second_component = ipad[1]
    
    # Sort the second component by converting each [cyc, count] to a tuple
    if isinstance(second_component, list):
        sorted_second = sorted(
            second_component,
            key=lambda x: (tuple(x[0]) if isinstance(x[0], list) else x[0], x[1]) if isinstance(x, list) and len(x) >= 2 else str(x)
        )
        return [K_cyc, sorted_second]
    
    return ipad

# Helper function to convert lists to tuples for comparison
def to_tuple(obj):
    """Recursively convert list to tuple for comparison"""
    if isinstance(obj, list):
        return tuple(to_tuple(item) for item in obj)
    return obj

# Use the same target_ipads from the previous cell
# Find IdSmallGroup for each specified IPAD (with normalization)
# results_normalized = []

# for i, target_ipad in enumerate(target_ipads, 1):
#     # Normalize the target IPAD by sorting the second component
#     normalized_target = normalize_ipad(target_ipad)
#     target_ipad_tuple = to_tuple(normalized_target)
    
#     # Filter DataFrame to find matching IPAD (normalize each IPAD before comparison)
#     matching_rows = ipad_id_df[ipad_id_df['IPAD'].apply(lambda x: to_tuple(normalize_ipad(x)) == target_ipad_tuple)]
    
#     if len(matching_rows) > 0:
#         # Get the unique IdSmallGroup (should be only one based on our analysis)
#         id_small_group = matching_rows.iloc[0]['IdSmallGroup']
#         d_value = matching_rows.iloc[0]['D']  # Also get a D value as example
#         count = len(matching_rows)  # How many entries have this IPAD
        
#         results_normalized.append({
#             'IPAD_index': i,
#             'IPAD': target_ipad,
#             'Normalized_IPAD': normalized_target,
#             'IdSmallGroup': id_small_group,
#             'Example_D': d_value,
#             'Count': count
#         })
#     else:
#         results_normalized.append({
#             'IPAD_index': i,
#             'IPAD': target_ipad,
#             'Normalized_IPAD': normalized_target,
#             'IdSmallGroup': None,
#             'Example_D': None,
#             'Count': 0,
#             'Note': 'IPAD not found in dataset'
#         })

# # Display results
# print("=" * 80)
# print(f"Results: IdSmallGroup for {len(target_ipads)} specified IPADs")
# print("(Using normalized comparison - second component order ignored)")
# print("=" * 80)

# for result in results_normalized:
#     print(f"\nIPAD #{result['IPAD_index']}:")
#     print(f"  Original IPAD: {result['IPAD']}")
#     print(f"  Normalized IPAD: {result['Normalized_IPAD']}")
#     if result['IdSmallGroup']:
#         print(f"  IdSmallGroup: {result['IdSmallGroup']}")
#         print(f"  Example D value: {result['Example_D']}")
#         print(f"  Number of entries with this (normalized) IPAD: {result['Count']}")
#     else:
#         print(f"  ⚠️  {result.get('Note', 'Not found')}")

# # Summary table
# print("\n" + "=" * 80)
# print("Summary Table:")
# print("=" * 80)
# results_df_normalized = pd.DataFrame(results_normalized)
# print(results_df_normalized[['IPAD_index', 'IdSmallGroup', 'Example_D', 'Count']].to_string(index=False))

The following counts the number of IPADs of each type along with the corresponding small group id

In [None]:
# Count normalized IPAD types in IPADs.json and find IdSmallGroup for each type
import json
from collections import Counter, defaultdict

json_file = "../IPADs/IPADs.json"
ids_file = "../3-SmallGroupIds/D-and-small-group-id.json"

print(f"Loading {json_file}...")
with open(json_file, 'r') as f:
    data = json.load(f)

print(f"Loaded {len(data)} entries")

print(f"\nLoading {ids_file}...")
with open(ids_file, 'r') as f:
    ids_data = json.load(f)

# Create mapping from D to IdSmallGroup
d_to_id = {entry["D"]: entry["IdSmallGroup"] for entry in ids_data}
print(f"Loaded {len(d_to_id)} D-to-IdSmallGroup mappings")

# Normalize all IPADs and track examples (D values) for each normalized type
print("\nNormalizing IPADs and counting types...")
normalized_ipads = []
ipad_to_example_d = defaultdict(list)  # Map normalized IPAD to list of D values

for entry in data:
    ipad = entry.get("IPAD", [])
    d_value = entry.get("D", "")
    normalized = normalize_ipad(ipad)
    # Convert to tuple for hashing (needed for Counter)
    normalized_tuple = to_tuple(normalized)
    normalized_ipads.append(normalized_tuple)
    ipad_to_example_d[normalized_tuple].append(d_value)

# Count occurrences of each normalized IPAD type
ipad_counts = Counter(normalized_ipads)

print(f"\n{'=' * 80}")
print(f"Results: Normalized IPAD Type Counts")
print(f"{'=' * 80}")
print(f"Total entries: {len(data)}")
print(f"Unique normalized IPAD types: {len(ipad_counts)}")

# Sort by count (descending) for easier viewing
sorted_counts = sorted(ipad_counts.items(), key=lambda x: x[1], reverse=True)

print(f"\n{'=' * 80}")
print("IPAD Type Counts with IdSmallGroup (sorted by frequency, descending):")
print(f"{'=' * 80}")
print(f"{'Rank':<6} {'Count':<8} {'IdSmallGroup':<20} {'Normalized IPAD'}")
print("-" * 100)

for rank, (ipad_tuple, count) in enumerate(sorted_counts, 1):
    # Convert tuple back to list for display
    def tuple_to_list(obj):
        if isinstance(obj, tuple):
            return [tuple_to_list(item) for item in obj]
        return obj
    
    ipad_list = tuple_to_list(ipad_tuple)
    ipad_str = str(ipad_list)
    # Truncate if too long
    if len(ipad_str) > 100:
        ipad_str = ipad_str[:97] + "..."
    
    # Get IdSmallGroup for this normalized IPAD type
    # Use the first example D value we found
    example_d_values = ipad_to_example_d[ipad_tuple]
    id_small_group = "N/A"
    if example_d_values:
        example_d = example_d_values[0]
        id_small_group = d_to_id.get(example_d, "Not found")
    
    print(f"{rank:<6} {count:<8} {id_small_group:<20} {ipad_str}")

# Summary statistics
print(f"\n{'=' * 80}")
print("Summary Statistics:")
print(f"{'=' * 80}")
print(f"  - Most common IPAD type: {sorted_counts[0][1]} occurrences")
print(f"  - Least common IPAD type: {sorted_counts[-1][1]} occurrence(s)")
print(f"  - Average occurrences per type: {len(data) / len(ipad_counts):.2f}")

# Show distribution
count_values = list(ipad_counts.values())
unique_counts = Counter(count_values)
print(f"\nDistribution of counts:")
print(f"  (How many IPAD types appear N times)")
for count_val in sorted(unique_counts.keys()):
    print(f"  {count_val} occurrence(s): {unique_counts[count_val]} IPAD type(s)")

Loading gap-files/IPADs-3.json...
Loaded 461925 entries

Loading D-and-small-group-id.json...
Loaded 461925 D-to-IdSmallGroup mappings

Normalizing IPADs and counting types...

Results: Normalized IPAD Type Counts
Total entries: 461925
Unique normalized IPAD types: 345

IPAD Type Counts with IdSmallGroup (sorted by frequency, descending):
Rank   Count    IdSmallGroup         Normalized IPAD
----------------------------------------------------------------------------------------------------
1      83353    [ 243, 5 ]           [[3, 3], [[[3, 3, 3], 1], [[9, 3], 3]]]
2      55310    [ 243, 18 ]          [[9, 3], [[[9, 3, 3], 2], [[27, 3], 2]]]
3      41398    [ 243, 7 ]           [[3, 3], [[[3, 3, 3], 2], [[9, 3], 2]]]
4      40968    [ 243, 4 ]           [[3, 3], [[[3, 3, 3], 3], [[9, 3], 1]]]
5      36458    [ 243, 8 ]           [[3, 3], [[[9, 3], 3], [[27, 9], 1]]]
6      35923    [ 243, 6 ]           [[3, 3], [[[3, 3, 3], 1], [[9, 3], 2], [[27, 9], 1]]]
7      18422    [ 243, 18 ]   

In [6]:
print(ipad_id_df.head(10))

       D IdSmallGroup                                                       IPAD
0   3299  [ 243, 18 ]                   [[9, 3], [[[9, 3, 3], 2], [[27, 3], 2]]]
1   4027   [ 243, 5 ]                    [[3, 3], [[[9, 3], 3], [[3, 3, 3], 1]]]
2  11651  [ 243, 17 ]  [[9, 3], [[[27, 3], 2], [[27, 9, 3], 1], [[9, 3, 3], 1]]]
3  12067   [ 243, 9 ]                                    [[3, 3], [[[9, 3], 4]]]
4  12131   [ 243, 7 ]                    [[3, 3], [[[3, 3, 3], 2], [[9, 3], 2]]]
5  16627   [ 243, 6 ]      [[3, 3], [[[9, 3], 2], [[27, 9], 1], [[3, 3, 3], 1]]]
6  17131   [ 243, 8 ]                      [[3, 3], [[[27, 9], 1], [[9, 3], 3]]]
7  17723  [ 243, 17 ]  [[9, 3], [[[27, 3], 2], [[27, 9, 3], 1], [[9, 3, 3], 1]]]
8  18555   [ 243, 6 ]      [[3, 3], [[[3, 3, 3], 1], [[9, 3], 2], [[27, 9], 1]]]
9  19187   [ 243, 7 ]                    [[3, 3], [[[3, 3, 3], 2], [[9, 3], 2]]]


In [None]:
#[729,9] [729,10] [729,11] [729,12] [729,26]
filtered_df = ipad_id_df[ipad_id_df['IdSmallGroup'] == '[ 2187, 33 ]']
print(filtered_df.head(46))

               D  IdSmallGroup                                                                                              IPAD
4855     3826859  [ 2187, 33 ]                                                 [[9, 9], [[[27, 9, 3, 3], 2], [[9, 9, 9, 3], 2]]]
11063    8187139  [ 2187, 33 ]                                                 [[9, 9], [[[27, 9, 3, 3], 2], [[9, 9, 9, 3], 2]]]
18065   13014563  [ 2187, 33 ]                                                 [[9, 9], [[[27, 9, 3, 3], 2], [[9, 9, 9, 3], 2]]]
41032   28175891  [ 2187, 33 ]                                                 [[9, 9], [[[27, 9, 3, 3], 2], [[9, 9, 9, 3], 2]]]
46524   31768867  [ 2187, 33 ]                                                 [[9, 9], [[[27, 9, 3, 3], 3], [[9, 9, 9, 3], 1]]]
56679   38365507  [ 2187, 33 ]                                                 [[9, 9], [[[27, 9, 3, 3], 2], [[9, 9, 9, 3], 2]]]
59856   40411115  [ 2187, 33 ]  [[81, 9], [[[243, 27, 9, 3], 1], [[243, 9, 3, 3], 1], [[81, 27, 2

In [15]:
#[729,9] [729,10] [729,11] [729,12] [729,26]
filtered_df = ipad_id_df[ipad_id_df['IdSmallGroup'] == '[ 729, 26 ]']
print(filtered_df[['D','IPAD']])

               D                                                               IPAD
444       460523                    [[9, 9], [[[27, 3, 3], 3], [[27, 9, 3, 3], 1]]]
925       878387  [[27, 9], [[[81, 3, 3], 2], [[27, 9, 9, 3], 1], [[27, 9, 3], 1]]]
1093     1023347    [[9, 9], [[[27, 3, 3], 2], [[27, 9, 3, 3], 1], [[9, 9, 3], 1]]]
1885     1622491    [[9, 9], [[[27, 9, 3, 3], 1], [[27, 3, 3], 2], [[9, 9, 3], 1]]]
2453     2053411     [[9, 9], [[[27, 3, 3], 2], [[9, 9, 9, 3], 1], [[9, 9, 3], 1]]]
...          ...                                                                ...
460513  98588568   [[9, 9], [[[27, 3, 3], 2], [[81, 27, 3, 3], 1], [[9, 9, 3], 1]]]
460827  98975496  [[27, 9], [[[81, 3, 3], 2], [[81, 9, 3, 3], 1], [[27, 9, 3], 1]]]
461190  99473352    [[9, 9], [[[27, 3, 3], 2], [[9, 9, 3], 1], [[27, 9, 3, 3], 1]]]
461493  99863688                  [[27, 9], [[[27, 9, 3], 3], [[81, 27, 9, 3], 1]]]
461712  50885959     [[9, 9], [[[9, 9, 3], 1], [[27, 3, 3], 2], [[9, 9, 9, 3

In [16]:
#[729,9] [729,10] [729,11] [729,12] [729,26]
filtered_df = ipad_id_df[ipad_id_df['IdSmallGroup'] == '[ 729, 9 ]']
print(filtered_df[['D','IPAD']])

               D                                                                IPAD
404       425131                      [[9, 3], [[[27, 9, 3], 3], [[3, 3, 3, 3], 1]]]
1101     1027379      [[9, 3], [[[9, 9, 9], 1], [[27, 9, 3], 2], [[3, 3, 3, 3], 1]]]
1367     1223931                      [[9, 3], [[[27, 9, 3], 3], [[3, 3, 3, 3], 1]]]
1384     1239763    [[9, 3], [[[27, 9, 3], 2], [[81, 27, 3], 1], [[3, 3, 3, 3], 1]]]
1458     1294771    [[27, 3], [[[27, 9, 9], 1], [[81, 9, 3], 2], [[9, 3, 3, 3], 1]]]
...          ...                                                                 ...
460628  98741704     [[9, 3], [[[81, 27, 3], 2], [[9, 9, 9], 1], [[3, 3, 3, 3], 1]]]
460876  99045640    [[27, 3], [[[81, 9, 3], 2], [[27, 9, 9], 1], [[9, 3, 3, 3], 1]]]
461295  99593416  [[81, 3], [[[81, 9, 9], 2], [[243, 9, 3], 1], [[27, 3, 3, 3], 1]]]
461536  99906648                      [[9, 3], [[[27, 9, 3], 3], [[3, 3, 3, 3], 1]]]
461629  99200779   [[27, 3], [[[27, 9, 9], 2], [[81, 27, 9], 1], 

In [17]:
#[729,9] [729,10] [729,11] [729,12] [729,26]
filtered_df = ipad_id_df[ipad_id_df['IdSmallGroup'] == '[ 729, 10 ]']
print(filtered_df[['D','IPAD']])

               D                                                             IPAD
194       218123    [[27, 3], [[[27, 3, 3], 2], [[81, 9, 3], 1], [[9, 9, 3], 1]]]
236       269187                      [[9, 3], [[[9, 3, 3], 3], [[27, 9, 3], 1]]]
263       293115                      [[9, 3], [[[9, 3, 3], 3], [[27, 9, 3], 1]]]
322       347891                      [[9, 3], [[[27, 9, 3], 1], [[9, 3, 3], 3]]]
359       384139                     [[9, 3], [[[9, 3, 3], 3], [[81, 27, 3], 1]]]
...          ...                                                              ...
461257  99542216                      [[9, 3], [[[9, 3, 3], 3], [[27, 9, 3], 1]]]
461435  99778920    [[27, 3], [[[27, 3, 3], 2], [[81, 9, 3], 1], [[9, 9, 3], 1]]]
461458  99817672                       [[9, 3], [[[9, 3, 3], 3], [[9, 9, 9], 1]]]
461692  40505511  [[27, 3], [[[27, 3, 3], 2], [[27, 27, 27], 1], [[9, 9, 3], 1]]]
461825  51721684                      [[9, 3], [[[27, 9, 3], 1], [[9, 3, 3], 3]]]

[6555 rows x 2 

In [18]:
#[729,9] [729,10] [729,11] [729,12] [729,26]
filtered_df = ipad_id_df[ipad_id_df['IdSmallGroup'] == '[ 729, 11 ]']
print(filtered_df[['D','IPAD']])

               D                                                              IPAD
58         87979       [[9, 3], [[[9, 3, 3], 2], [[9, 9, 9], 1], [[27, 9, 3], 1]]]
59         89923       [[9, 3], [[[9, 9, 9], 1], [[9, 3, 3], 2], [[27, 9, 3], 1]]]
310       340251                     [[27, 3], [[[27, 9, 9], 2], [[27, 3, 3], 2]]]
488       497859                       [[9, 3], [[[9, 3, 3], 2], [[27, 9, 3], 2]]]
570       572899   [[27, 3], [[[27, 3, 3], 2], [[81, 27, 9], 1], [[27, 9, 9], 1]]]
...          ...                                                               ...
461725  58363279  [[27, 3], [[[27, 3, 3], 2], [[27, 27, 27], 1], [[27, 9, 9], 1]]]
461759  73164367   [[81, 3], [[[243, 9, 3], 1], [[81, 3, 3], 2], [[81, 9, 9], 1]]]
461760  73693471       [[9, 3], [[[9, 3, 3], 2], [[9, 9, 9], 1], [[27, 9, 3], 1]]]
461863  96279556                     [[27, 3], [[[27, 3, 3], 2], [[27, 9, 9], 2]]]
461912  81759784    [[27, 3], [[[81, 9, 3], 1], [[27, 3, 3], 2], [[27, 9, 9], 1]]]

[61

In [19]:
#[729,9] [729,10] [729,11] [729,12] [729,26]
filtered_df = ipad_id_df[ipad_id_df['IdSmallGroup'] == '[ 729, 12 ]']
print(filtered_df[['D','IPAD']])

               D                                           IPAD
560       566947  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
568       570883  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
569       572179  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
721       697811  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
739       720619  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
...          ...                                            ...
461068  99302856  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
461172  99456056  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
461249  99536456  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
461721  55664503  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]
461763  74334407  [[9, 3], [[[9, 3, 3], 3], [[3, 3, 3, 3], 1]]]

[4299 rows x 2 columns]


In [None]:
filtered_df = ipad_id_df[ipad_id_df['IdSmallGroup'] == '[ 2187, 33 ]']
print(filtered_df.head(46))

[[3,243];[3,3,3,81],[3,729]3]

In [34]:
# Filter by IPAD matching a specific list structure
# Since IPAD is a nested list, we need to compare the actual list structure, not a string

# Method 1: Convert to tuple for comparison (recommended - faster)
#target_ipad = [[9, 3], [[[9, 3, 3], 2], [[27, 3], 2]]]
target_ipad = [[243, 3], [[[81, 3, 3, 3], 1], [[243, 9, 9], 1], [[243, 243, 243], 1], [[729, 9, 3], 1]]]

def to_tuple(obj):
    """Recursively convert list to tuple for comparison"""
    if isinstance(obj, list):
        return tuple(to_tuple(item) for item in obj)
    return obj

target_ipad_tuple = to_tuple(target_ipad)
filtered_df = ipad_id_df[ipad_id_df['IPAD'].apply(lambda x: to_tuple(x) == target_ipad_tuple)]

print(f"Found {len(filtered_df)} entries with IPAD = {target_ipad}")
print(filtered_df.head())

# Alternative Method 2: Use JSON serialization for comparison
# import json
# target_ipad_str = json.dumps(target_ipad, sort_keys=True)
# filtered_df2 = ipad_id_df[ipad_id_df['IPAD'].apply(lambda x: json.dumps(x, sort_keys=True) == target_ipad_str)]
# print(f"Method 2 found {len(filtered_df2)} entries")

Found 0 entries with IPAD = [[243, 3], [[[81, 3, 3, 3], 1], [[243, 9, 9], 1], [[243, 243, 243], 1], [[729, 9, 3], 1]]]
Empty DataFrame
Columns: [D, IdSmallGroup, IPAD, IPAD_normalized]
Index: []


In [29]:
# Specify 14 IPADs and find the corresponding IdSmallGroup for each
# Since we know each IPAD maps to exactly one IdSmallGroup, this should work well

# Helper function to convert lists to tuples for comparison (reuse from earlier)
def to_tuple(obj):
    """Recursively convert list to tuple for comparison"""
    if isinstance(obj, list):
        return tuple(to_tuple(item) for item in obj)
    return obj

# Specify your 14 IPADs here (replace with your actual IPADs)
# Example format - replace these with your actual 14 IPADs:
target_ipads = [
    [[3, 3], [[[3, 3, 3], 1], [[9, 3], 3]]],
    [[9, 3], [[[9, 3, 3], 2], [[27, 3], 2]]], 
    [[3, 3], [[[3, 3, 3], 2], [[9, 3], 2]]],
    [[3, 3], [[[3, 3, 3], 3], [[9, 3], 1]]],
    [[3, 3], [[[9, 3], 3], [[27, 9], 1]]],
    [[3, 3], [[[3, 3, 3], 1], [[9, 3], 2], [[27, 9], 1]]],
    [[27, 3], [[[27, 3, 3], 2], [[81, 3], 2]]],
    [[3, 3], [[[3, 3, 3], 2], [[27, 9], 2]]],
    [[9, 3], [[[27, 3], 2], [[27, 9, 3], 1], [[9, 3, 3], 1]]],
    [[3, 3], [[[9, 3], 4]]],
    [[9, 3], [[[27, 3], 3], [[9, 3, 3], 1]]],
    [[9, 3], [[[27, 3], 3], [[3, 3, 3, 3], 1]]],
    [[9, 3], [[[27, 3], 2], [[9,9,9], 1], [[9, 3, 3], 1]]],
    [[9, 3], [[[27, 3], 3], [[27, 9, 3], 1]]]
]

# If you want to get the 14 IPADs from a specific IdSmallGroup first:
# For example, to get the 14 IPADs from IdSmallGroup [243, 14] (which has 4 IPADs):
# Uncomment and modify the following:
"""
target_id = '[ 243, 14 ]'
target_group_data = id_to_ipads[id_to_ipads['IdSmallGroup'] == target_id]
if len(target_group_data) > 0:
    ipads_list = target_group_data.iloc[0]['IPADs']
    # Convert string IPADs back to actual list structures
    import ast
    target_ipads = [ast.literal_eval(ipad_str) for ipad_str in ipads_list]
    print(f"Found {len(target_ipads)} IPADs for IdSmallGroup {target_id}")
"""

# Find IdSmallGroup for each specified IPAD (match and count by NORMALIZED IPAD)
# Run the cell that defines normalize_ipad() first.
results = []

for i, target_ipad in enumerate(target_ipads, 1):
    normalized_target = normalize_ipad(target_ipad)
    target_normalized_tuple = to_tuple(normalized_target)
    
    # Filter DataFrame: match by normalized IPAD (ignore order of second component)
    matching_rows = ipad_id_df[ipad_id_df['IPAD'].apply(lambda x: to_tuple(normalize_ipad(x)) == target_normalized_tuple)]
    
    if len(matching_rows) > 0:
        # Get the unique IdSmallGroup (should be only one based on our analysis)
        id_small_group = matching_rows.iloc[0]['IdSmallGroup']
        d_value = matching_rows.iloc[0]['D']  # Also get a D value as example
        count = len(matching_rows)  # Number of entries whose normalized IPAD matches
        
        results.append({
            'IPAD_index': i,
            'IPAD': target_ipad,
            'IdSmallGroup': id_small_group,
            'Example_D': d_value,
            'Count': count
        })
    else:
        results.append({
            'IPAD_index': i,
            'IPAD': target_ipad,
            'IdSmallGroup': None,
            'Example_D': None,
            'Count': 0,
            'Note': 'IPAD not found in dataset'
        })

# Display results
print("=" * 80)
print(f"Results: IdSmallGroup for {len(target_ipads)} specified IPADs")
print("=" * 80)

for result in results:
    print(f"\nIPAD #{result['IPAD_index']}:")
    print(f"  IPAD: {result['IPAD']}")
    if result['IdSmallGroup']:
        print(f"  IdSmallGroup: {result['IdSmallGroup']}")
        print(f"  Example D value: {result['Example_D']}")
        print(f"  Number of entries with this (normalized) IPAD: {result['Count']}")
    else:
        print(f"  ⚠️  {result.get('Note', 'Not found')}")

# Summary table
print("\n" + "=" * 80)
print("Summary Table:")
print("=" * 80)
results_df = pd.DataFrame(results)
print(results_df[['IPAD_index', 'IdSmallGroup', 'Example_D', 'Count']].to_string(index=False))

Results: IdSmallGroup for 14 specified IPADs

IPAD #1:
  IPAD: [[3, 3], [[[3, 3, 3], 1], [[9, 3], 3]]]
  IdSmallGroup: [ 243, 5 ]
  Example D value: 19651
  Number of entries with this IPAD: 20799

IPAD #2:
  IPAD: [[9, 3], [[[9, 3, 3], 2], [[27, 3], 2]]]
  IdSmallGroup: [ 243, 18 ]
  Example D value: 3299
  Number of entries with this IPAD: 18495

IPAD #3:
  IPAD: [[3, 3], [[[3, 3, 3], 2], [[9, 3], 2]]]
  IdSmallGroup: [ 243, 7 ]
  Example D value: 12131
  Number of entries with this IPAD: 20665

IPAD #4:
  IPAD: [[3, 3], [[[3, 3, 3], 3], [[9, 3], 1]]]
  IdSmallGroup: [ 243, 4 ]
  Example D value: 27355
  Number of entries with this IPAD: 30721

IPAD #5:
  IPAD: [[3, 3], [[[9, 3], 3], [[27, 9], 1]]]
  IdSmallGroup: [ 243, 8 ]
  Example D value: 22443
  Number of entries with this IPAD: 27390

IPAD #6:
  IPAD: [[3, 3], [[[3, 3, 3], 1], [[9, 3], 2], [[27, 9], 1]]]
  IdSmallGroup: [ 243, 6 ]
  Example D value: 18555
  Number of entries with this IPAD: 5922

IPAD #7:
  IPAD: [[27, 3], [[[

In [27]:
# Check if different IdSmallGroup values can have the same IPAD
# Normalize IPADs by sorting the second component (to ignore ordering)

def normalize_ipad(ipad):
    """
    Normalize an IPAD by sorting the second component.
    IPAD format: [K_cyc, [[[cyc1, count1], [cyc2, count2], ...]]]
    We sort the inner list of [cyc, count] pairs by converting to tuples.
    """
    if not isinstance(ipad, list) or len(ipad) != 2:
        return ipad
    
    K_cyc = ipad[0]
    second_component = ipad[1]
    
    # Sort the second component by converting each [cyc, count] to a tuple
    # This makes the ordering canonical
    if isinstance(second_component, list):
        # Convert each [cyc, count] pair to a tuple for sorting
        sorted_second = sorted(
            second_component,
            key=lambda x: (tuple(x[0]) if isinstance(x[0], list) else x[0], x[1]) if isinstance(x, list) and len(x) >= 2 else str(x)
        )
        return [K_cyc, sorted_second]
    
    return ipad

# Create normalized IPAD string for grouping
ipad_id_df['IPAD_normalized'] = ipad_id_df['IPAD'].apply(lambda x: str(normalize_ipad(x)))

# Group by normalized IPAD and count unique IdSmallGroup values
ipad_to_ids = ipad_id_df.groupby('IPAD_normalized')['IdSmallGroup'].agg(['unique', 'nunique']).reset_index()
ipad_to_ids.columns = ['IPAD_normalized', 'IdSmallGroups', 'Count']

# Find IPADs that appear with multiple different IdSmallGroup values
multiple_ids = ipad_to_ids[ipad_to_ids['Count'] > 1].sort_values('Count', ascending=False)

print("=" * 80)
print("Analysis: Can different IdSmallGroup values have the same IPAD?")
print("(IPADs are normalized - second component sorted to ignore ordering)")
print("=" * 80)
print(f"\nTotal unique IPADs (normalized): {len(ipad_to_ids)}")
print(f"IPADs with multiple IdSmallGroup values: {len(multiple_ids)}")
print(f"IPADs with unique IdSmallGroup values: {len(ipad_to_ids[ipad_to_ids['Count'] == 1])}")

if len(multiple_ids) > 0:
    print(f"\n{'=' * 80}")
    print(f"Found {len(multiple_ids)} IPADs that appear with multiple IdSmallGroup values:")
    print(f"{'=' * 80}")
    print("\nFirst 20 examples:")
    for idx, row in multiple_ids.head(20).iterrows():
        print(f"\nIPAD (normalized): {row['IPAD_normalized'][:150]}...")  # Show first 150 chars
        print(f"  Number of different IdSmallGroups: {row['Count']}")
        print(f"  IdSmallGroups: {row['IdSmallGroups']}")
else:
    print("\n✓ No IPADs found with multiple IdSmallGroup values.")
    print("  Each IPAD appears with only one unique IdSmallGroup value.")

# Alternative: Show the reverse - count how many different IPADs each IdSmallGroup has
print(f"\n{'=' * 80}")
print("Reverse analysis: How many different IPADs does each IdSmallGroup have?")
print("(Using normalized IPADs - second component sorted)")
print(f"{'=' * 80}")
id_to_ipads = ipad_id_df.groupby('IdSmallGroup')['IPAD_normalized'].agg(['unique', 'nunique']).reset_index()
id_to_ipads.columns = ['IdSmallGroup', 'IPADs', 'Count']
id_to_ipads = id_to_ipads.sort_values('Count', ascending=False)

print(f"\nIdSmallGroups and their IPAD counts (top 20):")
print(id_to_ipads.head(20).to_string(index=False))

Analysis: Can different IdSmallGroup values have the same IPAD?
(IPADs are normalized - second component sorted to ignore ordering)

Total unique IPADs (normalized): 345
IPADs with multiple IdSmallGroup values: 0
IPADs with unique IdSmallGroup values: 345

✓ No IPADs found with multiple IdSmallGroup values.
  Each IPAD appears with only one unique IdSmallGroup value.

Reverse analysis: How many different IPADs does each IdSmallGroup have?
(Using normalized IPADs - second component sorted)

IdSmallGroups and their IPAD counts (top 20):
IdSmallGroup                                                                                                                                                                                                                                                                                                                                                                                                                                                               

In [26]:
# Print all IPADs for each IdSmallGroup
# Group by IdSmallGroup and get unique normalized IPADs
id_to_ipads_list = ipad_id_df.groupby('IdSmallGroup')['IPAD_normalized'].apply(lambda x: sorted(set(x))).reset_index()
id_to_ipads_list.columns = ['IdSmallGroup', 'IPADs_list']

# Sort by IdSmallGroup for consistent output
id_to_ipads_list = id_to_ipads_list.sort_values('IdSmallGroup')

print("=" * 80)
print("IPADs for each IdSmallGroup:")
print("=" * 80)

for idx, row in id_to_ipads_list.iterrows():
    id_small_group = row['IdSmallGroup']
    ipads = row['IPADs_list']
    
    print(f"\n{id_small_group} --")
    print("          |")
    for ipad in ipads:
        # Convert string back to list for pretty printing, or just print the string
        # Try to parse it back to show it nicely
        try:
            import ast
            ipad_list = ast.literal_eval(ipad)
            print(f"          {ipad_list}")
        except:
            # If parsing fails, just print the string
            print(f"          {ipad}")
    
    print(f"          ({len(ipads)} unique IPAD(s))")

print(f"\n{'=' * 80}")
print(f"Total IdSmallGroups: {len(id_to_ipads_list)}")
print(f"{'=' * 80}")

IPADs for each IdSmallGroup:

[ 2187, 33 ] --
          |
          [[243, 9], [[[81, 27, 9, 3], 1], [[243, 9, 9, 3], 1], [[729, 9, 3, 3], 2]]]
          [[27, 9], [[[27, 9, 9, 3], 1], [[27, 27, 27, 3], 1], [[81, 9, 3, 3], 2]]]
          [[27, 9], [[[27, 9, 9, 3], 1], [[81, 9, 3, 3], 3]]]
          [[27, 9], [[[27, 9, 9, 3], 2], [[81, 9, 3, 3], 1], [[81, 27, 9, 3], 1]]]
          [[27, 9], [[[27, 9, 9, 3], 2], [[81, 9, 3, 3], 2]]]
          [[27, 9], [[[81, 9, 3, 3], 3], [[81, 27, 9, 3], 1]]]
          [[81, 9], [[[81, 9, 9, 3], 1], [[243, 9, 3, 3], 3]]]
          [[81, 9], [[[81, 9, 9, 3], 1], [[81, 27, 27, 3], 1], [[243, 9, 3, 3], 1], [[243, 27, 9, 3], 1]]]
          [[81, 9], [[[81, 9, 9, 3], 1], [[81, 27, 27, 3], 1], [[243, 9, 3, 3], 2]]]
          [[81, 9], [[[81, 9, 9, 3], 2], [[243, 9, 3, 3], 2]]]
          [[9, 9], [[[27, 9, 3, 3], 2], [[81, 27, 3, 3], 2]]]
          [[9, 9], [[[27, 9, 3, 3], 3], [[81, 27, 3, 3], 1]]]
          [[9, 9], [[[27, 9, 3, 3], 4]]]
          [[9, 9], 

In [35]:
# Filter by normalized IPAD (order of second component doesn't matter)
# This version normalizes both the target and DataFrame IPADs before comparison

# Set your target IPAD here
target_ipad = [[243, 3], [[[81, 3, 3, 3], 1], [[243, 9, 9], 1], [[243, 243, 243], 1], [[729, 9, 3], 1]]]
# Or use: target_ipad = [[27, 3], [[[27, 3], 3], [[9, 3, 3, 3], 1]]]

# Normalize the target IPAD by sorting the second component
normalized_target = normalize_ipad(target_ipad)
target_ipad_tuple = to_tuple(normalized_target)

print(f"Target IPAD (original): {target_ipad}")
print(f"Target IPAD (normalized): {normalized_target}")

# Filter DataFrame: normalize each IPAD before comparison
filtered_df = ipad_id_df[ipad_id_df['IPAD'].apply(lambda x: to_tuple(normalize_ipad(x)) == target_ipad_tuple)]

print(f"\nFound {len(filtered_df)} entries with normalized IPAD matching the target")
if len(filtered_df) > 0:
    print(filtered_df.head())
else:
    print("No matches found.")

Target IPAD (original): [[243, 3], [[[81, 3, 3, 3], 1], [[243, 9, 9], 1], [[243, 243, 243], 1], [[729, 9, 3], 1]]]
Target IPAD (normalized): [[243, 3], [[[81, 3, 3, 3], 1], [[243, 9, 9], 1], [[243, 243, 243], 1], [[729, 9, 3], 1]]]

Found 1 entries with normalized IPAD matching the target
               D IdSmallGroup                                                                                        IPAD                                                                             IPAD_normalized
439251  71699080   [ 729, 9 ]  [[243, 3], [[[243, 243, 243], 1], [[729, 9, 3], 1], [[243, 9, 9], 1], [[81, 3, 3, 3], 1]]]  [[243, 3], [[[81, 3, 3, 3], 1], [[243, 9, 9], 1], [[243, 243, 243], 1], [[729, 9, 3], 1]]]
