In [11]:
import numpy as np
from itertools import combinations

def calculate_pooled_variance(group):
    n = len(group)
    if n == 0:
        return 0
    variances = np.array([pair[1] for pair in group])
    return np.sum((n - 1) * variances) / (n - 1) if n > 1 else variances[0]


def find_all_splits(pairs, num_groups):
    n = len(pairs)
    indices = range(1, n)
    split_combinations = combinations(indices, num_groups - 1)
    
    all_splits = []
    for split_points in split_combinations:
        split_points = (0,) + split_points + (n,)
        all_splits.append([pairs[split_points[i]:split_points[i + 1]] for i in range(num_groups)])

    return all_splits

def evaluate_splits(splits):
    """
    Evaluate all splits and calculate total mean deviation and pooled variance.
    """
    best_split = None
    best_score = float('inf')

    for split in splits:
        group_means = [sum(pair[0] for pair in group) for group in split]
        pooled_variances = [calculate_pooled_variance(group) for group in split]

        mean_deviation = max(group_means) - min(group_means)
        total_pooled_variance = sum(pooled_variances)

        # Score can be weighted between mean deviation and pooled variance
        score = mean_deviation + total_pooled_variance

        if score < best_score:
            best_score = score
            best_split = split

    return best_split, best_score

# Input data
pairs = [
    (4.84, 0.6292),
    (8.77, 1.3155),
    (3.54, 0.354),
    (27.08, 3.5204),
    (4.86, 0.486),
    (4, 0.4),
    (10.56, 1.3728),
    (25.16, 3.2708),
    (3.22, 0.483),
    (19.02, 2.4726),
    (4.3, 0.559),
    (3.54, 0.354),
    (4, 0.4),
    (4.73, 0.7095),
    (12.1, 1.815),
    (9.92, 1.488),
    (2.93, 0.4395),
    (19.02, 2.4726),
    (6.35, 0.635),
    (12.1, 1.573),
    (6.35, 0.635),
    (15.86, 2.0618),
    (62.26, 8.0938),
    (6.35, 0.635),
    (3.22, 0.322),
    (47.42, 6.1646),
    (8.85, 0.885),
    (4, 0.4),
    (62.26, 8.0938),
    (3.98, 0.398),
    (6.7, 1.005),
    (8.85, 0.885),
    (4, 0.4),
    (12.1, 1.815),
    (24.2, 3.63),
    (54.84, 7.1292),
    (6.35, 0.635),
    (3.98, 0.5174),
    (6.1132, 0.91698),
    (6.35, 0.635),
    (5.63, 0.8445),
]

# Generate all possible splits into 4 groups
num_groups = 4
all_splits = find_all_splits(pairs, num_groups)

# Evaluate splits to find the best one
best_split, best_score = evaluate_splits(all_splits)

# Display the best split
for i, group in enumerate(best_split):
    group_mean = sum(pair[0] for pair in group)
    pooled_variance = calculate_pooled_variance(group)
    print(f"Group {i + 1}:")
    print(f"  Total Mean: {group_mean:.2f}")
    print(f"  Pooled Variance: {pooled_variance:.2f}")
    print(f"  Pairs: {group}\n")

Group 1:
  Total Mean: 139.72
  Pooled Variance: 18.14
  Pairs: [(4.84, 0.6292), (8.77, 1.3155), (3.54, 0.354), (27.08, 3.5204), (4.86, 0.486), (4, 0.4), (10.56, 1.3728), (25.16, 3.2708), (3.22, 0.483), (19.02, 2.4726), (4.3, 0.559), (3.54, 0.354), (4, 0.4), (4.73, 0.7095), (12.1, 1.815)]

Group 2:
  Total Mean: 134.79
  Pooled Variance: 17.40
  Pairs: [(9.92, 1.488), (2.93, 0.4395), (19.02, 2.4726), (6.35, 0.635), (12.1, 1.573), (6.35, 0.635), (15.86, 2.0618), (62.26, 8.0938)]

Group 3:
  Total Mean: 136.08
  Pooled Variance: 16.90
  Pairs: [(6.35, 0.635), (3.22, 0.322), (47.42, 6.1646), (8.85, 0.885), (4, 0.4), (62.26, 8.0938), (3.98, 0.398)]

Group 4:
  Total Mean: 139.11
  Pooled Variance: 18.41
  Pairs: [(6.7, 1.005), (8.85, 0.885), (4, 0.4), (12.1, 1.815), (24.2, 3.63), (54.84, 7.1292), (6.35, 0.635), (3.98, 0.5174), (6.1132, 0.91698), (6.35, 0.635), (5.63, 0.8445)]

