In [3]:
# CCDM example: small-block enumerative CCDM mapping (brute-force enumeration)
# This code demonstrates a simple CCDM that maps k bits -> a sequence of amplitudes
# with *constant composition* (multiset permutations). It's suitable for small block sizes.
#
# We:
#  - define an amplitude alphabet (e.g., [1, 3, 5])
#  - choose a composition (counts per amplitude) summing to block length n
#  - enumerate all distinct sequences with that composition, sort lexicographically
#  - map the first 2^k sequences to k-bit input words (k = floor(log2(num_sequences)))
#  - show mapping table, probabilities, average energy comparison with uniform QAM-like mapping
#
# NOTE: This is a pedagogical example; practical CCDM implementations use efficient enumerative
# algorithms for larger block lengths (e.g., arithmetic coding / enumerative coding).

from itertools import permutations
import pandas as pd
from math import log2, floor, factorial
from collections import Counter

# Parameters for the toy example
alphabet = [1, 3, 5]           # amplitude levels
composition = [2, 1, 1]        # counts for each amplitude, sums to n = 4
n = sum(composition)

# Generate all distinct permutations of the multiset defined by (alphabet, composition)
multiset = []
for a, c in zip(alphabet, composition):
    multiset += [a] * c

# Use set(permutations(...)) to get distinct arrangements
all_seqs = sorted(set(permutations(multiset, n)))

num_sequences = len(all_seqs)
k = floor(log2(num_sequences))  # number of input bits we can map losslessly (integer)
num_mapped = 2**k                # number of sequences we will use (power of two)
unused = num_sequences - num_mapped

# Prepare mapping: map binary words 0..2^k-1 to the first 2^k sequences
mapping = []
for idx in range(num_mapped):
    bits = format(idx, f'0{k}b')
    seq = all_seqs[idx]
    mapping.append({'bits': bits, 'sequence': seq})

df = pd.DataFrame(mapping)
# compute amplitude distribution from mapped sequences (each sequence equally likely)
amp_counts = Counter()
for row in df['sequence']:
    amp_counts.update(row)
amp_prob = {a: amp_counts[a] / (num_mapped * n) for a in alphabet}

# For comparison: uniform distribution over the same alphabet (naive uniform over amplitudes)
uniform_prob = {a: 1/len(alphabet) for a in alphabet}

# Compute average symbol energy (E = amplitude^2) for shaped vs uniform
E_shaped = sum((a**2) * p for a, p in amp_prob.items())
E_uniform = sum((a**2) * p for a, p in uniform_prob.items())

# Entropy per amplitude for shaped distribution (bits of information per amplitude symbol)
import math
H_shaped = -sum(p*math.log2(p) for p in amp_prob.values() if p>0)
H_uniform = -sum(p*math.log2(p) for p in uniform_prob.values() if p>0)

# Create a summary table
summary = pd.DataFrame([
    {'Distribution': 'CCDM-shaped (mapped)', 'num_sequences': num_sequences, 'mapped_sequences': num_mapped,
     'k_bits': k, 'unused_sequences': unused, 
     'E_avg': E_shaped, 'H_per_amp_bits': H_shaped},
    {'Distribution': 'Uniform (naive)', 'num_sequences': 'N/A', 'mapped_sequences': 'N/A',
     'k_bits': 'N/A', 'unused_sequences': 'N/A', 
     'E_avg': E_uniform, 'H_per_amp_bits': H_uniform},
])

# Display mapping and summary
# from caas_jupyter_tools import display_dataframe_to_user
# display_dataframe_to_user("CCDM Mapping Table (first mapped sequences)", df)
# display_dataframe_to_user("Summary: energies and entropies", summary)

# Also print the amplitude probabilities
amp_table = pd.DataFrame([{'amplitude': a, 'P_shaped': amp_prob[a], 'P_uniform': uniform_prob[a]} for a in alphabet])
print("Amplitude probabilities (shaped vs uniform)", amp_table)

# Finally show how to invert: recover bits from a given sequence by lookup
def sequence_to_bits(sequence):
    try:
        idx = all_seqs.index(tuple(sequence))
    except ValueError:
        return None  # sequence not in the enumerated set
    if idx < num_mapped:
        return format(idx, f'0{k}b')
    else:
        return None  # this sequence isn't used by our mapping (unused sequences)

# Demonstrate inversion on mapped sequences
inversion_demo = []
for _, row in df.iterrows():
    inversion_demo.append({'sequence': row['sequence'], 'bits_from_inverse': sequence_to_bits(row['sequence'])})
print(pd.DataFrame(inversion_demo))

# Print a small explanatory note for the user
print(
    f"Toy CCDM example: alphabet={alphabet}, composition={composition}, block length n={n}\n"
    f"Total distinct sequences with that composition: {num_sequences}\n"
    f"We map k={k} bits -> {num_mapped} sequences (first {num_mapped} lexicographically) and leave {unused} sequences unused.\n"
    f"Average symbol energy (shaped) = {E_shaped:.3f}, (uniform) = {E_uniform:.3f}  -> shaped reduces average energy by {E_uniform - E_shaped:.3f}.\n"
    f"Entropy per amplitude: shaped = {H_shaped:.3f} bits, uniform = {H_uniform:.3f} bits.\n\n"
    "Notes:\n"
    "- This mapping is invertible for the used sequences (we showed sequence->bits inversion for mapped sequences).\n"
    "- Practical CCDM uses large blocks & efficient enumerative coding to approach the target distribution with smaller rate loss.\n"
)


Amplitude probabilities (shaped vs uniform)    amplitude  P_shaped  P_uniform
0          1      0.50   0.333333
1          3      0.25   0.333333
2          5      0.25   0.333333
       sequence bits_from_inverse
0  (1, 1, 3, 5)               000
1  (1, 1, 5, 3)               001
2  (1, 3, 1, 5)               010
3  (1, 3, 5, 1)               011
4  (1, 5, 1, 3)               100
5  (1, 5, 3, 1)               101
6  (3, 1, 1, 5)               110
7  (3, 1, 5, 1)               111
Toy CCDM example: alphabet=[1, 3, 5], composition=[2, 1, 1], block length n=4
Total distinct sequences with that composition: 12
We map k=3 bits -> 8 sequences (first 8 lexicographically) and leave 4 sequences unused.
Average symbol energy (shaped) = 9.000, (uniform) = 11.667  -> shaped reduces average energy by 2.667.
Entropy per amplitude: shaped = 1.500 bits, uniform = 1.585 bits.

Notes:
- This mapping is invertible for the used sequences (we showed sequence->bits inversion for mapped sequences).
- Practi

In [4]:
from itertools import permutations
from math import exp, floor, log2
import pandas as pd
from collections import Counter

# --- 1. Amplitudes e distribuição MB ---
alphabet = [1, 3, 5]
lambda_param = 0.1  # controla "força" do shaping

# Maxwell-Boltzmann: P(a) ∝ e^(-lambda * a^2)
mb_weights = [exp(-lambda_param * a**2) for a in alphabet]
Z = sum(mb_weights)
mb_probs = [w / Z for w in mb_weights]

print("Distribuição MB alvo:")
for a, p in zip(alphabet, mb_probs):
    print(f"  A={a}, P={p:.3f}")

# --- 2. Escolha da composição constante ---
n = 6  # tamanho do bloco (pequeno para visualização)
target_counts = [round(p * n) for p in mb_probs]
# Ajuste para garantir que soma seja n
while sum(target_counts) != n:
    diff = n - sum(target_counts)
    target_counts[target_counts.index(max(target_counts))] += diff

print(f"\nComposição escolhida para n={n}: {target_counts}")

# --- 3. Gerar todas as permutações com essa composição ---
multiset = []
for a, c in zip(alphabet, target_counts):
    multiset += [a] * c

all_seqs = sorted(set(permutations(multiset, n)))
num_sequences = len(all_seqs)
k = floor(log2(num_sequences))
num_mapped = 2**k
unused = num_sequences - num_mapped

print(f"\nTotal de sequências possíveis: {num_sequences}")
print(f"Bits mapeados por bloco: k={k} ({num_mapped} sequências usadas, {unused} não usadas)")

# --- 4. Mapear bits -> sequências ---
mapping = []
for idx in range(num_mapped):
    bits = format(idx, f'0{k}b')
    seq = all_seqs[idx]
    mapping.append({'bits': bits, 'sequence': seq})

df = pd.DataFrame(mapping)
print("\nTabela de mapeamento (primeiros bits -> sequência):")
print(df)

# --- 5. Comparar distribuição e energia ---
amp_counts = Counter()
for seq in df['sequence']:
    amp_counts.update(seq)
amp_prob_shaped = {a: amp_counts[a] / (num_mapped * n) for a in alphabet}
amp_prob_uniform = {a: 1/len(alphabet) for a in alphabet}

E_shaped = sum((a**2) * p for a, p in amp_prob_shaped.items())
E_uniform = sum((a**2) * p for a, p in amp_prob_uniform.items())

print("\nProbabilidades obtidas (shaped vs uniforme):")
for a in alphabet:
    print(f"A={a}: shaped={amp_prob_shaped[a]:.3f}, uniform={amp_prob_uniform[a]:.3f}")

print(f"\nEnergia média shaped = {E_shaped:.3f}")
print(f"Energia média uniforme = {E_uniform:.3f}")
print(f"Redução de energia = {E_uniform - E_shaped:.3f}")


Distribuição MB alvo:
  A=1, P=0.649
  A=3, P=0.292
  A=5, P=0.059

Composição escolhida para n=6: [4, 2, 0]

Total de sequências possíveis: 15
Bits mapeados por bloco: k=3 (8 sequências usadas, 7 não usadas)

Tabela de mapeamento (primeiros bits -> sequência):
  bits            sequence
0  000  (1, 1, 1, 1, 3, 3)
1  001  (1, 1, 1, 3, 1, 3)
2  010  (1, 1, 1, 3, 3, 1)
3  011  (1, 1, 3, 1, 1, 3)
4  100  (1, 1, 3, 1, 3, 1)
5  101  (1, 1, 3, 3, 1, 1)
6  110  (1, 3, 1, 1, 1, 3)
7  111  (1, 3, 1, 1, 3, 1)

Probabilidades obtidas (shaped vs uniforme):
A=1: shaped=0.667, uniform=0.333
A=3: shaped=0.333, uniform=0.333
A=5: shaped=0.000, uniform=0.333

Energia média shaped = 3.667
Energia média uniforme = 11.667
Redução de energia = 8.000
