<a href="https://colab.research.google.com/github/gimenopea/CachedHE/blob/main/Praxis_Benchmark_2_MPI_Diabetes_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
!pip install mpi4py tenseal phe

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libopenmpi-dev is already the newest version (4.1.2-2ubuntu1).
openmpi-bin is already the newest version (4.1.2-2ubuntu1).
openmpi-bin set to manually installed.
openmpi-common is already the newest version (4.1.2-2ubuntu1).
openmpi-common set to manually installed.
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.
Collecting mpi4py
  Downloading mpi4py-4.0.3.tar.gz (466 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m466.3/466.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tenseal
  Downloading tenseal-0.3.16-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Collecting phe
  Downloading phe-

# Paillier Uncached

In [None]:
%%writefile benchmark2_diabetes_paillier_uncached.py
from mpi4py import MPI
import numpy as np
import time
import pandas as pd
from phe import paillier
import os
import csv

VERSION = 'Paillier PHE Uncached'
CSV_PATH = '/content/drive/MyDrive/praxisfiles/Benchmark/benchmark2_diabetes_paillier_uncached.csv'


def init_csv(path):
    """Create a new CSV file with headers only."""
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])

def write_csv(path, version, cores, enc_time, inf_time, result):
    """Append a new row to the existing CSV file."""
    with open(path, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            version,
            cores,
            f"{enc_time:.6f}",
            f"{inf_time:.6f}",
            result
        ])


def input_encryption_experiment(sub_comm, input_vector, public_key):
    """
    Distributes the encryption of the input vector among processes in the subcommunicator.
    Each process encrypts those features whose index modulo the subcommunicator size equals its rank.
    The resulting (index, ciphertext) pairs are gathered at the subcommunicator's root,
    sorted by index, and reassembled into the complete encrypted input vector.
    Returns the total encryption time and the ordered list of encrypted features.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(input_vector)

    # Round-robin distribution: assign indices where index mod size equals rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_encrypted = []
    for i in indices:
        # Encrypt the i-th feature.
        ct = public_key.encrypt(int(input_vector[i]))
        local_encrypted.append((i, ct))
    total_time = MPI.Wtime() - start_time

    # Gather all (index, ciphertext) pairs to the root of the subcommunicator.
    gathered = sub_comm.gather(local_encrypted, root=0)

    if sub_comm.Get_rank() == 0:
        flat_list = [pair for sublist in gathered for pair in sublist]
        flat_list.sort(key=lambda x: x[0])
        encrypted_vector = [ct for idx, ct in flat_list]
    else:
        encrypted_vector = None
    # Broadcast the complete encrypted vector to all processes in the subcommunicator.
    encrypted_vector = sub_comm.bcast(encrypted_vector, root=0)
    return total_time, encrypted_vector

def model_inference_experiment(sub_comm, encrypted_vector, weights, private_key):
    """
    Performs homomorphic model inference on the encrypted input vector.
    Each process computes, for its assigned indices, the weighted encryption by multiplying
    the encrypted feature by its corresponding weight. These (index, ciphertext) pairs are
    gathered, sorted, and then combined via simple sequential addition to compute the homomorphic dot product.
    Returns the inference time and the decrypted result.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(encrypted_vector)

    # Distribute the weighted multiplication work (round-robin).
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_results = []
    for i in indices:
        # Multiply the encrypted feature by the weight.
        ct_weighted = encrypted_vector[i] * weights[i]
        local_results.append((i, ct_weighted))
    gathered = sub_comm.gather(local_results, root=0)

    total_inference_time = MPI.Wtime() - start_time
    if sub_comm.Get_rank() == 0:
        flat_results = [pair for sublist in gathered for pair in sublist]
        flat_results.sort(key=lambda x: x[0])
        weighted_cts = [ct for idx, ct in flat_results]

        # Simple sequential addition instead of radix sum
        final_ciphertext = weighted_cts[0]
        for ct in weighted_cts[1:]:
            final_ciphertext = final_ciphertext + ct

        # Decrypt the final ciphertext to obtain the dot product.
        inference_result = private_key.decrypt(final_ciphertext)
    else:
        inference_result = None
    return total_inference_time, inference_result

def run_experiment(exp_cores, input_vector, weights, public_key, private_key, world_comm):
    rank = world_comm.Get_rank()

    # Create subcommunicator
    if world_comm.Get_size() == exp_cores:
        sub_comm = world_comm
    else:
        color = 0 if rank < exp_cores else MPI.UNDEFINED
        sub_comm = world_comm.Split(color, rank)
    if sub_comm == MPI.COMM_NULL:
        return

    enc_time, encrypted_vector = input_encryption_experiment(sub_comm, input_vector, public_key)
    inf_time, inference_result = model_inference_experiment(sub_comm, encrypted_vector, weights, private_key)

    if sub_comm.Get_rank() == 0:
        print(f"Experiment with {exp_cores} cores:", flush=True)
        print(f"  Input Encryption Time: {enc_time:.6f} sec", flush=True)
        print(f"  Model Inference Time:  {inf_time:.6f} sec", flush=True)
        print(f"  Inference Result (Dot Product): {inference_result}", flush=True)
        # Write to CSV
        write_csv(CSV_PATH, VERSION, exp_cores, enc_time, inf_time, inference_result)

    if sub_comm != world_comm:
        sub_comm.Free()

def main():
    world_comm = MPI.COMM_WORLD
    rank = world_comm.Get_rank()
    world_size = world_comm.Get_size()

    # Initialize CSV file with headers only (overwriting any existing file)
    if rank == 0:
        init_csv(CSV_PATH)
        print(f"== {VERSION} ==", flush=True)

    # Load data
    if rank == 0:
        url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        df = pd.read_csv(url, header=None)
        input_vector = df.iloc[0, :8].tolist()
        public_key, private_key = paillier.generate_paillier_keypair(n_length=1024)
    else:
        input_vector = None
        public_key, private_key = None, None

    input_vector = world_comm.bcast(input_vector, root=0)
    public_key = world_comm.bcast(public_key, root=0)
    private_key = world_comm.bcast(private_key, root=0)

    weights = [2, 1, 3, 1, 1, 2, 1, 1]

    for exp_cores in [4, 8, 16, 32]:
        if world_size >= exp_cores:
            run_experiment(exp_cores, input_vector, weights, public_key, private_key, world_comm)
        else:
            if rank == 0:
                print(f"Skipping experiment with {exp_cores} cores (world size {world_size} < {exp_cores}).", flush=True)

if __name__ == '__main__':
    main()

Overwriting benchmark2_diabetes_paillier_uncached.py


# Paillier Cached

In [None]:
%%writefile benchmark2_diabetes_paillier_cached.py
from mpi4py import MPI
import numpy as np
import time
import pandas as pd
from phe import paillier
import os
import csv

VERSION = 'Paillier PHE Cached'
CSV_PATH = '/content/drive/MyDrive/praxisfiles/Benchmark/benchmark2_diabetes_paillier_cached.csv'


def init_csv(path):
    # Create the CSV with headers - this will overwrite any existing file
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])


def write_to_csv(path, data):
    # Write all data at once to the CSV
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        # Write the header
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])
        # Write all data rows
        for row in data:
            writer.writerow(row)


def radix_sum(ciphertexts, radix=2):
    """
    Sums a list of ciphertexts using a tree-based reduction.
    Groups the ciphertexts into chunks of size 'radix', homomorphically adds each group,
    and repeats until only one ciphertext remains.
    """
    while len(ciphertexts) > 1:
        new_ciphertexts = []
        for i in range(0, len(ciphertexts), radix):
            group = ciphertexts[i:i+radix]
            group_sum = group[0]
            for ct in group[1:]:
                group_sum = group_sum + ct
            new_ciphertexts.append(group_sum)
        ciphertexts = new_ciphertexts
    return ciphertexts[0]


def input_encryption_experiment(sub_comm, input_vector, public_key):
    """
    Distributes the encryption of the input vector among processes in the subcommunicator.
    Each process encrypts those features whose index modulo the subcommunicator size equals its rank.
    The resulting (index, ciphertext) pairs are gathered at the subcommunicator's root,
    sorted by index, and reassembled into the complete encrypted input vector.
    Returns the total encryption time and the ordered list of encrypted features.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(input_vector)

    # Round-robin distribution: assign indices where index mod size equals rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_encrypted = []
    for i in indices:
        # Encrypt the i-th feature.
        ct = public_key.encrypt(int(input_vector[i]))
        local_encrypted.append((i, ct))
    total_time = MPI.Wtime() - start_time

    # Gather all (index, ciphertext) pairs to the root of the subcommunicator.
    gathered = sub_comm.gather(local_encrypted, root=0)

    if sub_comm.Get_rank() == 0:
        flat_list = [pair for sublist in gathered for pair in sublist]
        flat_list.sort(key=lambda x: x[0])
        encrypted_vector = [ct for idx, ct in flat_list]
    else:
        encrypted_vector = None
    # Broadcast the complete encrypted vector to all processes in the subcommunicator.
    encrypted_vector = sub_comm.bcast(encrypted_vector, root=0)
    return total_time, encrypted_vector


def model_inference_experiment(sub_comm, encrypted_vector, weights, private_key):
    """
    Performs homomorphic model inference on the encrypted input vector.
    Each process computes, for its assigned indices, the weighted encryption by multiplying
    the encrypted feature by its corresponding weight. These (index, ciphertext) pairs are
    gathered, sorted, and then combined via a tree-based (radix) reduction to compute the homomorphic dot product.
    Returns the inference time and the decrypted result.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(encrypted_vector)

    # Distribute the weighted multiplication work (round-robin).
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_results = []
    for i in indices:
        # Multiply the encrypted feature by the weight.
        ct_weighted = encrypted_vector[i] * weights[i]
        local_results.append((i, ct_weighted))
    gathered = sub_comm.gather(local_results, root=0)

    total_inference_time = MPI.Wtime() - start_time
    if sub_comm.Get_rank() == 0:
        flat_results = [pair for sublist in gathered for pair in sublist]
        flat_results.sort(key=lambda x: x[0])
        weighted_cts = [ct for idx, ct in flat_results]
        # Use radix caching (tree-based reduction) to sum the ciphertexts.
        final_ciphertext = radix_sum(weighted_cts, radix=2)
        # Decrypt the final ciphertext to obtain the dot product.
        inference_result = private_key.decrypt(final_ciphertext)
    else:
        inference_result = None
    return total_inference_time, inference_result


def run_experiment(exp_cores, input_vector, weights, public_key, private_key, world_comm, results):
    rank = world_comm.Get_rank()

    # Create subcommunicator
    if world_comm.Get_size() == exp_cores:
        sub_comm = world_comm
    else:
        color = 0 if rank < exp_cores else MPI.UNDEFINED
        sub_comm = world_comm.Split(color, rank)
    if sub_comm == MPI.COMM_NULL:
        return

    enc_time, encrypted_vector = input_encryption_experiment(sub_comm, input_vector, public_key)
    inf_time, inference_result = model_inference_experiment(sub_comm, encrypted_vector, weights, private_key)

    if sub_comm.Get_rank() == 0:
        print(f"Experiment with {exp_cores} cores:", flush=True)
        print(f"  Input Encryption Time: {enc_time:.6f} sec", flush=True)
        print(f"  Model Inference Time:  {inf_time:.6f} sec", flush=True)
        print(f"  Inference Result (Dot Product): {inference_result}", flush=True)
        # Store results in the list
        results.append([
            VERSION,
            exp_cores,
            f"{enc_time:.6f}",
            f"{inf_time:.6f}",
            inference_result
        ])

    if sub_comm != world_comm:
        sub_comm.Free()


def main():
    world_comm = MPI.COMM_WORLD
    rank = world_comm.Get_rank()
    world_size = world_comm.Get_size()

    # Initialize results list to collect all results
    results = []

    if rank == 0:
        print(f"== {VERSION} ==", flush=True)

    # Load data
    if rank == 0:
        url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        df = pd.read_csv(url, header=None)
        input_vector = df.iloc[0, :8].tolist()
        public_key, private_key = paillier.generate_paillier_keypair(n_length=1024)
    else:
        input_vector = None
        public_key, private_key = None, None

    input_vector = world_comm.bcast(input_vector, root=0)
    public_key = world_comm.bcast(public_key, root=0)
    private_key = world_comm.bcast(private_key, root=0)

    weights = [2, 1, 3, 1, 1, 2, 1, 1]

    for exp_cores in [4, 8, 16, 32]:
        if world_size >= exp_cores:
            run_experiment(exp_cores, input_vector, weights, public_key, private_key, world_comm, results)
        else:
            if rank == 0:
                print(f"Skipping experiment with {exp_cores} cores (world size {world_size} < {exp_cores}).", flush=True)

    # Write all results to CSV at once (only from rank 0)
    if rank == 0:
        write_to_csv(CSV_PATH, results)


if __name__ == '__main__':
    main()

Overwriting benchmark2_diabetes_paillier_cached.py


# BFV Uncached

In [None]:
%%writefile benchmark2_diabetes_bfv_uncached.py
from mpi4py import MPI
import pandas as pd
import numpy as np
import time
import tenseal as ts
import os
import csv

# Constants
VERSION = 'BFV TenSEAL Uncached'
CSV_PATH = '/content/drive/MyDrive/praxisfiles/Benchmark/benchmark2_diabetes_bfv_uncached.csv'

# CSV helpers
def write_csv_header(path):
    """Create a new CSV file with header row, overwriting any existing file"""
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])

def write_csv_row(path, version, cores, enc_time, inf_time, result):
    """Append a row to the existing CSV file"""
    with open(path, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            version,
            cores,
            f"{enc_time:.6f}",
            f"{inf_time:.6f}",
            result
        ])

# Existing functions
def sequential_sum(ciphertexts):
    s = ciphertexts[0]
    for ct in ciphertexts[1:]:
        s = s + ct
    return s

def input_encryption_experiment(sub_comm, input_vector, ctx):
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(input_vector)
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_encrypted = []
    for i in indices:
        ct = ts.bfv_vector(ctx, [int(input_vector[i])])
        serialized = ct.serialize()
        local_encrypted.append((i, serialized))
    total_time = MPI.Wtime() - start_time

    gathered = sub_comm.gather(local_encrypted, root=0)
    if sub_comm.Get_rank() == 0:
        flat = [pair for sub in gathered for pair in sub]
        flat.sort(key=lambda x: x[0])
        serialized_vector = [s for idx, s in flat]
    else:
        serialized_vector = None
    serialized_vector = sub_comm.bcast(serialized_vector, root=0)
    encrypted_vector = [ts.bfv_vector_from(ctx, s) for s in serialized_vector]
    return total_time, encrypted_vector

def model_inference_experiment(sub_comm, encrypted_vector, weights, ctx):
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(encrypted_vector)
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_results = []
    for i in indices:
        ctw = encrypted_vector[i] * weights[i]
        serialized = ctw.serialize()
        local_results.append((i, serialized))
    gathered = sub_comm.gather(local_results, root=0)
    total_time = MPI.Wtime() - start_time

    if sub_comm.Get_rank() == 0:
        flat = [pair for sub in gathered for pair in sub]
        flat.sort(key=lambda x: x[0])
        weighted_cts = [ts.bfv_vector_from(ctx, s) for idx, s in flat]
        final_ct = sequential_sum(weighted_cts)
        inference_result = final_ct.decrypt()[0]
    else:
        inference_result = None
    return total_time, inference_result

# Modified run_experiment to use new CSV writing functions
def run_experiment(exp_cores, input_vector, weights, ctx, world_comm, is_first_run):
    rank = world_comm.Get_rank()
    if world_comm.Get_size() == exp_cores:
        sub_comm = world_comm
    else:
        color = 0 if rank < exp_cores else MPI.UNDEFINED
        sub_comm = world_comm.Split(color, rank)
    if sub_comm == MPI.COMM_NULL:
        return

    enc_time, encrypted_vector = input_encryption_experiment(sub_comm, input_vector, ctx)
    inf_time, inference_result = model_inference_experiment(sub_comm, encrypted_vector, weights, ctx)

    if sub_comm.Get_rank() == 0:
        print(f"Experiment with {exp_cores} cores:")
        print(f"  Input Encryption Time: {enc_time:.6f} sec")
        print(f"  Model Inference Time:  {inf_time:.6f} sec")
        print(f"  Inference Result (Dot Product): {inference_result}")

        # If this is the first successful run, write the header and first row
        # Otherwise just append the row
        if is_first_run:
            write_csv_header(CSV_PATH)
            is_first_run = False
        write_csv_row(CSV_PATH, VERSION, exp_cores, enc_time, inf_time, inference_result)

    if sub_comm != world_comm:
        sub_comm.Free()

    return is_first_run

# Main function modified to create new CSV each run
def main():
    world_comm = MPI.COMM_WORLD
    rank = world_comm.Get_rank()
    world_size = world_comm.Get_size()

    if rank == 0:
        # Always create a new CSV file with headers at the start of each run
        write_csv_header(CSV_PATH)
        print(f"== {VERSION} ==")

    if rank == 0:
        url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        df = pd.read_csv(url, header=None)
        input_vector = df.iloc[0, :8].tolist()
        weights = [2, 1, 3, 1, 1, 2, 1, 1]
        ctx = ts.context(
            ts.SCHEME_TYPE.BFV,
            poly_modulus_degree=8192,
            plain_modulus=1032193
        )
        serialized_ctx = ctx.serialize(save_secret_key=True)
    else:
        input_vector = None
        weights = None
        serialized_ctx = None

    input_vector = world_comm.bcast(input_vector, root=0)
    weights = world_comm.bcast(weights, root=0)
    serialized_ctx = world_comm.bcast(serialized_ctx, root=0)
    ctx = ts.context_from(serialized_ctx)

    # Track if this is the first successful run
    is_first_run = True

    for exp_cores in [4, 8, 16, 32]:
        if world_size >= exp_cores:
            # No need to track first run status anymore since we always recreate the file
            run_experiment(exp_cores, input_vector, weights, ctx, world_comm, is_first_run=False)
        else:
            if rank == 0:
                print(f"Skipping experiment with {exp_cores} cores (world size {world_size} < {exp_cores}).")

if __name__ == '__main__':
    main()

Overwriting benchmark2_diabetes_bfv_uncached.py


#BFV Cached

In [None]:
%%writefile benchmark2_diabetes_bfv_cached.py
from mpi4py import MPI
import pandas as pd
import numpy as np
import time
import tenseal as ts
import os
import csv
import math

# Constants
VERSION = 'BFV TenSEAL Cached'
CSV_PATH = '/content/drive/MyDrive/praxisfiles/Benchmark/benchmark2_diabetes_bfv_cached.csv'
RADIX = 2  # binary decomposition

# CSV helpers
def write_csv_header(path):
    """Write CSV header, overwriting any existing file."""
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])

def append_csv(path, version, cores, enc_time, inf_time, result):
    """Append experiment results to CSV file."""
    with open(path, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            version,
            cores,
            f"{enc_time:.6f}",
            f"{inf_time:.6f}",
            result
        ])

def radix_encrypt(plaintext, cache):
    """
    Encrypts a value using radix decomposition approach.
    Decomposes the plaintext into RADIX-based digits, then
    homomorphically adds cached ciphertexts of RADIX^k.
    """
    value = int(plaintext)

    # Handle zero case explicitly - create a fresh encryption of zero
    if value == 0:
        # We can't multiply by 0 directly as it causes a "transparent ciphertext" error
        # Instead, encrypt 0 directly
        return ts.bfv_vector(cache[0].context(), [0])

    # Handle negative values
    is_negative = value < 0
    value = abs(value)

    # Initialize result as None
    ct = None

    # Decompose into radix
    k = 0
    while value > 0:
        digit = value % RADIX
        if digit:  # Only process non-zero digits
            term = cache[k] * digit
            if ct is None:
                ct = term
            else:
                ct = ct + term
        value //= RADIX
        k += 1

    # Apply negative sign if needed
    if is_negative and ct is not None:
        ct = ct * (-1)

    return ct

def radix_sum(ciphertexts, radix=2):
    """
    Sums a list of BFVVector objects using a tree-based reduction.
    Groups the ciphertexts into chunks of size 'radix', homomorphically adds each group,
    and repeats until only one ciphertext remains.
    """
    while len(ciphertexts) > 1:
        new_ciphertexts = []
        for i in range(0, len(ciphertexts), radix):
            group = ciphertexts[i:i+radix]
            group_sum = group[0]
            for ct in group[1:]:
                group_sum = group_sum + ct
            new_ciphertexts.append(group_sum)
        ciphertexts = new_ciphertexts
    return ciphertexts[0]

def input_encryption_experiment(sub_comm, input_vector, ctx, cache):
    """
    Distributes the encryption of the input vector among processes in the subcommunicator.
    Uses the radix encryption approach with the provided cache.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(input_vector)

    # Assign indices round-robin: feature i is handled by process where (i mod size) == rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_encrypted = []
    for i in indices:
        # Encrypt using radix decomposition with cache
        ct = radix_encrypt(input_vector[i], cache)
        # Serialize to a picklable string/bytes object.
        serialized = ct.serialize()
        local_encrypted.append((i, serialized))
    total_time = MPI.Wtime() - start_time

    # Gather all (index, serialized_ciphertext) pairs at the subcommunicator root.
    gathered = sub_comm.gather(local_encrypted, root=0)

    if sub_comm.Get_rank() == 0:
        flat_list = [pair for sublist in gathered for pair in sublist]
        flat_list.sort(key=lambda x: x[0])
        # Extract serialized strings.
        serialized_vector = [serialized for idx, serialized in flat_list]
    else:
        serialized_vector = None
    # Broadcast the serialized vector (list of strings) to all processes.
    serialized_vector = sub_comm.bcast(serialized_vector, root=0)
    # Deserialize the list back into BFVVector objects.
    encrypted_vector = [ts.bfv_vector_from(ctx, s) for s in serialized_vector]
    return total_time, encrypted_vector

def model_inference_experiment(sub_comm, encrypted_vector, weights, ctx):
    """
    Performs homomorphic model inference on the encrypted input vector.
    Each process multiplies its assigned encrypted features by their corresponding weight.
    Uses radix sum (tree-based reduction) for better performance.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(encrypted_vector)

    # Distribute work: assign indices where (i mod size) equals rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_results = []
    for i in indices:
        # Multiply the encrypted feature by its corresponding weight.
        ct_weighted = encrypted_vector[i] * weights[i]
        serialized = ct_weighted.serialize()
        local_results.append((i, serialized))
    gathered = sub_comm.gather(local_results, root=0)

    total_inference_time = MPI.Wtime() - start_time
    if sub_comm.Get_rank() == 0:
        flat_results = [pair for sublist in gathered for pair in sublist]
        flat_results.sort(key=lambda x: x[0])
        # Deserialize each weighted ciphertext.
        weighted_cts = [ts.bfv_vector_from(ctx, s) for idx, s in flat_results]
        # Sum the weighted ciphertexts using radix sum (tree-based).
        final_ciphertext = radix_sum(weighted_cts, radix=RADIX)
        # Decrypt the final ciphertext
        inference_result = final_ciphertext.decrypt()[0]
    else:
        inference_result = None
    return total_inference_time, inference_result

def run_experiment(exp_cores, input_vector, weights, ctx, cache, world_comm, results):
    """
    Forms a subcommunicator of size 'exp_cores' from the global communicator and runs:
      - Input encryption experiment (using radix-based encryption with cache)
      - Model inference experiment (computing the weighted dot product via radix sum)
    Timings for both phases are measured and printed from the subcommunicator's root.
    Results are collected in the results list for later writing.
    """
    rank = world_comm.Get_rank()
    if world_comm.Get_size() == exp_cores:
        sub_comm = world_comm
    else:
        color = 0 if rank < exp_cores else MPI.UNDEFINED
        sub_comm = world_comm.Split(color, rank)
    if sub_comm == MPI.COMM_NULL:
        return

    enc_time, encrypted_vector = input_encryption_experiment(sub_comm, input_vector, ctx, cache)
    inf_time, inference_result = model_inference_experiment(sub_comm, encrypted_vector, weights, ctx)

    if sub_comm.Get_rank() == 0:
        print(f"Experiment with {exp_cores} cores:", flush=True)
        print(f"  Input Encryption Time: {enc_time:.6f} sec", flush=True)
        print(f"  Model Inference Time:  {inf_time:.6f} sec", flush=True)
        print(f"  Inference Result (Dot Product): {inference_result}", flush=True)
        # Store results for later writing
        results.append((VERSION, exp_cores, enc_time, inf_time, inference_result))

    if sub_comm != world_comm:
        sub_comm.Free()

def main():
    world_comm = MPI.COMM_WORLD
    rank = world_comm.Get_rank()
    world_size = world_comm.Get_size()

    # Store results in a list instead of writing immediately
    results = []

    if rank == 0:
        print(f"== {VERSION} ==", flush=True)

    # Load the Pima Indian Diabetes dataset on the root using pandas.
    if rank == 0:
        url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        df = pd.read_csv(url, header=None)
        # Extract the first sample's features (columns 0 to 7).
        input_vector = df.iloc[0, :8].tolist()

        # Find maximum value to determine cache size
        max_val = max(int(x) for x in input_vector)
        bits = math.ceil(math.log(max_val+1, RADIX))
    else:
        input_vector = None
        bits = None

    # Broadcast the input vector and bits to all processes.
    input_vector = world_comm.bcast(input_vector, root=0)
    bits = world_comm.bcast(bits, root=0)

    # Define predetermined model weights (8 weights).
    weights = [2, 1, 3, 1, 1, 2, 1, 1]

    # On the root, create a TenSEAL BFV context.
    if rank == 0:
        ctx = ts.context(
            ts.SCHEME_TYPE.BFV,
            poly_modulus_degree=8192,
            plain_modulus=1032193
        )

        # Create cache of encrypted powers of RADIX
        cache = [ts.bfv_vector(ctx, [RADIX**k]) for k in range(bits+1)]

        # Serialize the context with the secret key so that decryption works.
        serialized_ctx = ctx.serialize(save_secret_key=True)

        # Serialize cache for broadcast
        serialized_cache = [c.serialize() for c in cache]
    else:
        serialized_ctx = None
        serialized_cache = None

    # Broadcast the serialized context and reconstruct it on all processes.
    serialized_ctx = world_comm.bcast(serialized_ctx, root=0)
    ctx = ts.context_from(serialized_ctx)

    # Broadcast the serialized cache and reconstruct it on all processes
    serialized_cache = world_comm.bcast(serialized_cache, root=0)
    cache = [ts.bfv_vector_from(ctx, s) for s in serialized_cache]

    # Run experiments for subcommunicator sizes: 4, 8, 16, and 32 cores.
    experiment_core_counts = [4, 8, 16, 32]
    for exp_cores in experiment_core_counts:
        if world_size >= exp_cores:
            run_experiment(exp_cores, input_vector, weights, ctx, cache, world_comm, results)
        else:
            if rank == 0:
                print(f"Skipping experiment with {exp_cores} cores (world size {world_size} < {exp_cores}).", flush=True)

    # Write all results to CSV at once, overwriting the file
    if rank == 0:
        write_csv_header(CSV_PATH)
        for result in results:
            append_csv(CSV_PATH, *result)

if __name__ == '__main__':
    main()

Overwriting benchmark2_diabetes_bfv_cached.py


# CKKS Uncached

In [None]:
%%writefile benchmark2_diabetes_ckks_uncached.py
from mpi4py import MPI
import pandas as pd
import numpy as np
import time
import tenseal as ts
import math
import os
import csv

VERSION = 'CKKS TenSEAL Uncached'
CSV_PATH = '/content/drive/MyDrive/praxisfiles/Benchmark/benchmark2_diabetes_ckks_uncached.csv'

def write_csv_header(path):
    """Initialize CSV file with headers, overwriting any existing file."""
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])

def write_csv_row(path, version, cores, enc_time, inf_time, result):
    """Write experiment results to CSV file."""
    with open(path, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            version,
            cores,
            f"{enc_time:.6f}",
            f"{inf_time:.6f}",
            result
        ])

def direct_encrypt(plaintext, ctx):
    """
    Directly encrypts a value using the provided context.
    No radix decomposition or caching is used.
    """
    value = float(plaintext)
    return ts.ckks_vector(ctx, [value])

def sequential_sum(ciphertexts):
    """
    Sums a list of CKKSVector objects sequentially.
    Simpler than tree-based reduction but potentially less efficient.
    """
    if not ciphertexts:
        return None

    result = ciphertexts[0]
    for ct in ciphertexts[1:]:
        result = result + ct
    return result

def input_encryption_experiment(sub_comm, input_vector, ctx):
    """
    Distributes the encryption of the input vector among processes in the subcommunicator.
    Uses direct encryption with no caching.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(input_vector)

    # Assign indices round-robin: feature i is handled by process where (i mod size) == rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_encrypted = []
    for i in indices:
        # Directly encrypt the value
        ct = direct_encrypt(input_vector[i], ctx)
        # Serialize to a picklable string/bytes object.
        serialized = ct.serialize()
        local_encrypted.append((i, serialized))
    total_time = MPI.Wtime() - start_time

    # Gather all (index, serialized_ciphertext) pairs at the subcommunicator root.
    gathered = sub_comm.gather(local_encrypted, root=0)

    if sub_comm.Get_rank() == 0:
        flat_list = [pair for sublist in gathered for pair in sublist]
        flat_list.sort(key=lambda x: x[0])
        # Extract serialized strings.
        serialized_vector = [serialized for idx, serialized in flat_list]
    else:
        serialized_vector = None
    # Broadcast the serialized vector (list of strings) to all processes.
    serialized_vector = sub_comm.bcast(serialized_vector, root=0)
    # Deserialize the list back into CKKSVector objects.
    encrypted_vector = [ts.ckks_vector_from(ctx, s) for s in serialized_vector]
    return total_time, encrypted_vector

def model_inference_experiment(sub_comm, encrypted_vector, weights, ctx):
    """
    Performs homomorphic model inference on the encrypted input vector.
    Each process multiplies its assigned encrypted features by their corresponding weight.
    Uses sequential addition rather than tree-based reduction.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(encrypted_vector)

    # Distribute work: assign indices where (i mod size) equals rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_results = []
    for i in indices:
        # Multiply the encrypted feature by its corresponding weight.
        ct_weighted = encrypted_vector[i] * weights[i]
        serialized = ct_weighted.serialize()
        local_results.append((i, serialized))
    gathered = sub_comm.gather(local_results, root=0)

    total_inference_time = MPI.Wtime() - start_time
    if sub_comm.Get_rank() == 0:
        flat_results = [pair for sublist in gathered for pair in sublist]
        flat_results.sort(key=lambda x: x[0])
        # Deserialize each weighted ciphertext.
        weighted_cts = [ts.ckks_vector_from(ctx, s) for idx, s in flat_results]
        # Sum the weighted ciphertexts sequentially
        final_ciphertext = sequential_sum(weighted_cts)
        # Decrypt the final ciphertext; CKKS decryption returns approximate floats.
        inference_result = final_ciphertext.decrypt()[0]
    else:
        inference_result = None
    return total_inference_time, inference_result

def run_experiment(exp_cores, input_vector, weights, ctx, world_comm):
    """
    Forms a subcommunicator of size 'exp_cores' from the global communicator and runs:
      - Input encryption experiment (using direct encryption without cache)
      - Model inference experiment (computing the weighted dot product via sequential sum)
    Timings for both phases are measured and printed from the subcommunicator's root.
    """
    rank = world_comm.Get_rank()
    if world_comm.Get_size() == exp_cores:
        sub_comm = world_comm
    else:
        color = 0 if rank < exp_cores else MPI.UNDEFINED
        sub_comm = world_comm.Split(color, rank)
    if sub_comm == MPI.COMM_NULL:
        return

    enc_time, encrypted_vector = input_encryption_experiment(sub_comm, input_vector, ctx)
    inf_time, inference_result = model_inference_experiment(sub_comm, encrypted_vector, weights, ctx)

    if sub_comm.Get_rank() == 0:
        print(f"Experiment with {exp_cores} cores:", flush=True)
        print(f"  Input Encryption Time: {enc_time:.6f} sec", flush=True)
        print(f"  Model Inference Time:  {inf_time:.6f} sec", flush=True)
        print(f"  Inference Result (Dot Product): {inference_result}", flush=True)
        # Write to CSV
        write_csv_row(CSV_PATH, VERSION, exp_cores, enc_time, inf_time, inference_result)

    if sub_comm != world_comm:
        sub_comm.Free()

def main():
    world_comm = MPI.COMM_WORLD
    rank = world_comm.Get_rank()
    world_size = world_comm.Get_size()

    if rank == 0:
        write_csv_header(CSV_PATH)
        print(f"== {VERSION} ==", flush=True)

    # Load the Pima Indian Diabetes dataset on the root using pandas.
    if rank == 0:
        url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        df = pd.read_csv(url, header=None)
        # Extract the first sample's features (columns 0 to 7).
        input_vector = df.iloc[0, :8].tolist()
    else:
        input_vector = None

    # Broadcast the input vector to all processes.
    input_vector = world_comm.bcast(input_vector, root=0)

    # Define predetermined model weights (8 weights).
    weights = [2, 1, 3, 1, 1, 2, 1, 1]

    # On the root, create a TenSEAL CKKS context.
    if rank == 0:
        ctx = ts.context(
            ts.SCHEME_TYPE.CKKS,
            poly_modulus_degree=8192,
            coeff_mod_bit_sizes=[60, 40, 40, 60]
        )
        ctx.global_scale = 2**40

        # Serialize the context with the secret key so that decryption works.
        serialized_ctx = ctx.serialize(save_secret_key=True)
    else:
        serialized_ctx = None

    # Broadcast the serialized context and reconstruct it on all processes.
    serialized_ctx = world_comm.bcast(serialized_ctx, root=0)
    ctx = ts.context_from(serialized_ctx)

    # Run experiments for subcommunicator sizes: 4, 8, 16, and 32 cores.
    experiment_core_counts = [4, 8, 16, 32]
    for exp_cores in experiment_core_counts:
        if world_size >= exp_cores:
            run_experiment(exp_cores, input_vector, weights, ctx, world_comm)
        else:
            if rank == 0:
                print(f"Skipping experiment with {exp_cores} cores (world size {world_size} < {exp_cores}).", flush=True)

if __name__ == '__main__':
    main()

Overwriting benchmark2_diabetes_ckks_uncached.py


# CKKS Cached

In [None]:
%%writefile benchmark2_diabetes_ckks_cached.py
from mpi4py import MPI
import pandas as pd
import numpy as np
import time
import tenseal as ts
import math
import os
import csv

VERSION = 'CKKS TenSEAL Cached'
CSV_PATH = '/content/drive/MyDrive/praxisfiles/Benchmark/benchmark2_diabetes_ckks_cached.csv'
RADIX = 2  # binary decomposition

def init_csv(path):
    """Initialize CSV file with headers."""
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])

def write_to_csv(path, version, cores, enc_time, inf_time, result):
    """Write experiment results to CSV file."""
    # Check if file exists and has headers
    if not os.path.exists(path):
        init_csv(path)

    # Read existing data
    existing_data = []
    if os.path.exists(path):
        with open(path, 'r', newline='') as f:
            reader = csv.reader(f)
            # Skip header
            next(reader, None)
            # Get all other rows
            for row in reader:
                existing_data.append(row)

    # Update or append new data
    updated = False
    for i, row in enumerate(existing_data):
        if row[0] == version and int(row[1]) == cores:
            # Update existing entry
            existing_data[i] = [
                version,
                cores,
                f"{enc_time:.6f}",
                f"{inf_time:.6f}",
                result
            ]
            updated = True
            break

    if not updated:
        # Add new entry
        existing_data.append([
            version,
            cores,
            f"{enc_time:.6f}",
            f"{inf_time:.6f}",
            result
        ])

    # Write all data back
    with open(path, 'w', newline='') as f:
        writer = csv.writer(f)
        # Write header
        writer.writerow([
            'Version',
            'Cores',
            'Input Encryption Time (sec)',
            'Model Inference Time (sec)',
            'Inference Result (Dot Product)'
        ])
        # Write data
        writer.writerows(existing_data)

def radix_encrypt(plaintext, cache):
    """
    Encrypts a value using radix decomposition approach.
    Decomposes the plaintext into RADIX-based digits, then
    homomorphically adds cached ciphertexts of RADIX^k.
    """
    value = float(plaintext)

    # Handle zero case explicitly by directly encrypting 0
    if value == 0:
        return ts.ckks_vector(cache[0].context(), [0.0])

    # Handle negative values
    is_negative = value < 0
    value = abs(value)

    # Integer part
    int_value = int(value)

    # If the value is zero after all processing, return a fresh encryption of 0
    if int_value == 0:
        return ts.ckks_vector(cache[0].context(), [0.0])

    ct = None
    k = 0
    while int_value > 0:
        digit = int_value % RADIX
        if digit:
            term = cache[k] * digit
            if ct is None:
                ct = term
            else:
                ct = ct + term
        int_value //= RADIX
        k += 1

    # Apply negative sign if needed
    if is_negative and ct is not None:
        ct = ct * (-1)

    # Final safety check to ensure we never return None
    if ct is None:
        return ts.ckks_vector(cache[0].context(), [0.0])

    return ct

def radix_sum(ciphertexts, radix=2):
    """
    Sums a list of CKKSVector objects using a tree-based reduction.
    Groups the ciphertexts into chunks of size 'radix', homomorphically adds each group,
    and repeats until only one ciphertext remains.
    """
    while len(ciphertexts) > 1:
        new_ciphertexts = []
        for i in range(0, len(ciphertexts), radix):
            group = ciphertexts[i:i+radix]
            group_sum = group[0]
            for ct in group[1:]:
                group_sum = group_sum + ct
            new_ciphertexts.append(group_sum)
        ciphertexts = new_ciphertexts
    return ciphertexts[0]

def input_encryption_experiment(sub_comm, input_vector, ctx, cache):
    """
    Distributes the encryption of the input vector among processes in the subcommunicator.
    Uses the radix encryption approach with the provided cache.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(input_vector)

    # Assign indices round-robin: feature i is handled by process where (i mod size) == rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_encrypted = []
    for i in indices:
        # Encrypt using radix decomposition with cache
        ct = radix_encrypt(input_vector[i], cache)
        # Serialize to a picklable string/bytes object.
        serialized = ct.serialize()
        local_encrypted.append((i, serialized))
    total_time = MPI.Wtime() - start_time

    # Gather all (index, serialized_ciphertext) pairs at the subcommunicator root.
    gathered = sub_comm.gather(local_encrypted, root=0)

    if sub_comm.Get_rank() == 0:
        flat_list = [pair for sublist in gathered for pair in sublist]
        flat_list.sort(key=lambda x: x[0])
        # Extract serialized strings.
        serialized_vector = [serialized for idx, serialized in flat_list]
    else:
        serialized_vector = None
    # Broadcast the serialized vector (list of strings) to all processes.
    serialized_vector = sub_comm.bcast(serialized_vector, root=0)
    # Deserialize the list back into CKKSVector objects.
    encrypted_vector = [ts.ckks_vector_from(ctx, s) for s in serialized_vector]
    return total_time, encrypted_vector

def model_inference_experiment(sub_comm, encrypted_vector, weights, ctx):
    """
    Performs homomorphic model inference on the encrypted input vector.
    Each process multiplies its assigned encrypted features by their corresponding weight.
    Uses radix sum (tree-based reduction) for better performance than sequential addition.
    """
    rank = sub_comm.Get_rank()
    size = sub_comm.Get_size()
    n_features = len(encrypted_vector)

    # Distribute work: assign indices where (i mod size) equals rank.
    indices = [i for i in range(n_features) if i % size == rank]

    start_time = MPI.Wtime()
    local_results = []
    for i in indices:
        # Multiply the encrypted feature by its corresponding weight.
        ct_weighted = encrypted_vector[i] * weights[i]
        serialized = ct_weighted.serialize()
        local_results.append((i, serialized))
    gathered = sub_comm.gather(local_results, root=0)

    total_inference_time = MPI.Wtime() - start_time
    if sub_comm.Get_rank() == 0:
        flat_results = [pair for sublist in gathered for pair in sublist]
        flat_results.sort(key=lambda x: x[0])
        # Deserialize each weighted ciphertext.
        weighted_cts = [ts.ckks_vector_from(ctx, s) for idx, s in flat_results]
        # Sum the weighted ciphertexts using radix sum (tree-based) instead of sequential.
        final_ciphertext = radix_sum(weighted_cts, radix=RADIX)
        # Decrypt the final ciphertext; CKKS decryption returns approximate floats.
        inference_result = final_ciphertext.decrypt()[0]
    else:
        inference_result = None
    return total_inference_time, inference_result

def run_experiment(exp_cores, input_vector, weights, ctx, cache, world_comm):
    """
    Forms a subcommunicator of size 'exp_cores' from the global communicator and runs:
      - Input encryption experiment (using radix-based encryption with cache)
      - Model inference experiment (computing the weighted dot product via radix sum)
    Timings for both phases are measured and printed from the subcommunicator's root.
    """
    rank = world_comm.Get_rank()
    if world_comm.Get_size() == exp_cores:
        sub_comm = world_comm
    else:
        color = 0 if rank < exp_cores else MPI.UNDEFINED
        sub_comm = world_comm.Split(color, rank)
    if sub_comm == MPI.COMM_NULL:
        return

    enc_time, encrypted_vector = input_encryption_experiment(sub_comm, input_vector, ctx, cache)
    inf_time, inference_result = model_inference_experiment(sub_comm, encrypted_vector, weights, ctx)

    if sub_comm.Get_rank() == 0:
        print(f"Experiment with {exp_cores} cores:", flush=True)
        print(f"  Input Encryption Time: {enc_time:.6f} sec", flush=True)
        print(f"  Model Inference Time:  {inf_time:.6f} sec", flush=True)
        print(f"  Inference Result (Dot Product): {inference_result}", flush=True)
        # Write to CSV
        write_to_csv(CSV_PATH, VERSION, exp_cores, enc_time, inf_time, inference_result)

    if sub_comm != world_comm:
        sub_comm.Free()

def main():
    world_comm = MPI.COMM_WORLD
    rank = world_comm.Get_rank()
    world_size = world_comm.Get_size()

    if rank == 0:
        init_csv(CSV_PATH)
        print(f"== {VERSION} ==", flush=True)

    # Load the Pima Indian Diabetes dataset on the root using pandas.
    if rank == 0:
        url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        df = pd.read_csv(url, header=None)
        # Extract the first sample's features (columns 0 to 7).
        input_vector = df.iloc[0, :8].tolist()

        # Find maximum value to determine cache size
        max_val = max(float(x) for x in input_vector)
        bits = math.ceil(math.log(max_val+1, RADIX))
    else:
        input_vector = None
        bits = None

    # Broadcast the input vector and bits to all processes.
    input_vector = world_comm.bcast(input_vector, root=0)
    bits = world_comm.bcast(bits, root=0)

    # Define predetermined model weights (8 weights).
    weights = [2, 1, 3, 1, 1, 2, 1, 1]

    # On the root, create a TenSEAL CKKS context.
    if rank == 0:
        ctx = ts.context(
            ts.SCHEME_TYPE.CKKS,
            poly_modulus_degree=8192,
            coeff_mod_bit_sizes=[60, 40, 40, 60]
        )
        ctx.global_scale = 2**40

        # Create cache of encrypted powers of RADIX
        cache = [ts.ckks_vector(ctx, [float(RADIX**k)]) for k in range(bits+1)]

        # Serialize the context with the secret key so that decryption works.
        serialized_ctx = ctx.serialize(save_secret_key=True)

        # Serialize cache for broadcast
        serialized_cache = [c.serialize() for c in cache]
    else:
        serialized_ctx = None
        serialized_cache = None

    # Broadcast the serialized context and reconstruct it on all processes.
    serialized_ctx = world_comm.bcast(serialized_ctx, root=0)
    ctx = ts.context_from(serialized_ctx)

    # Broadcast the serialized cache and reconstruct it on all processes
    serialized_cache = world_comm.bcast(serialized_cache, root=0)
    cache = [ts.ckks_vector_from(ctx, s) for s in serialized_cache]

    # Run experiments for subcommunicator sizes: 4, 8, 16, and 32 cores.
    experiment_core_counts = [4, 8, 16, 32]
    for exp_cores in experiment_core_counts:
        if world_size >= exp_cores:
            run_experiment(exp_cores, input_vector, weights, ctx, cache, world_comm)
        else:
            if rank == 0:
                print(f"Skipping experiment with {exp_cores} cores (world size {world_size} < {exp_cores}).", flush=True)

if __name__ == '__main__':
    main()

Overwriting benchmark2_diabetes_ckks_cached.py


# Run Experiment

In [None]:
!mpirun --allow-run-as-root -np 32 stdbuf -oL python benchmark2_diabetes_paillier_uncached.py;
!mpirun --allow-run-as-root -np 32 stdbuf -oL python benchmark2_diabetes_paillier_cached.py;

!mpirun --allow-run-as-root -np 32 stdbuf -oL python benchmark2_diabetes_bfv_uncached.py;
!mpirun --allow-run-as-root -np 32 stdbuf -oL python benchmark2_diabetes_bfv_cached.py;

!mpirun --allow-run-as-root -np 32 stdbuf -oL python benchmark2_diabetes_ckks_uncached.py
!mpirun --allow-run-as-root -np 32 stdbuf -oL python benchmark2_diabetes_ckks_cached.py

== Paillier PHE Uncached ==
Experiment with 4 cores:
  Input Encryption Time: 0.035861 sec
  Model Inference Time:  0.000144 sec
  Inference Result (Dot Product): 527
Experiment with 8 cores:
  Input Encryption Time: 0.018187 sec
  Model Inference Time:  0.000195 sec
  Inference Result (Dot Product): 527
Experiment with 16 cores:
  Input Encryption Time: 0.016047 sec
  Model Inference Time:  0.000182 sec
  Inference Result (Dot Product): 527
Experiment with 32 cores:
  Input Encryption Time: 0.016354 sec
  Model Inference Time:  0.000211 sec
  Inference Result (Dot Product): 527
== Paillier PHE Cached ==
Experiment with 4 cores:
  Input Encryption Time: 0.035480 sec
  Model Inference Time:  0.000168 sec
  Inference Result (Dot Product): 527
Experiment with 8 cores:
  Input Encryption Time: 0.017704 sec
  Model Inference Time:  0.000184 sec
  Inference Result (Dot Product): 527
Experiment with 16 cores:
  Input Encryption Time: 0.017884 sec
  Model Inference Time:  0.000164 sec
  Infere

In [None]:
path_root = '/content/drive/MyDrive/praxisfiles/Benchmark/'

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define path root
path_root = '/content/drive/MyDrive/praxisfiles/Benchmark/'

# Read all CSV files
bfv_cached = pd.read_csv(f'{path_root}benchmark2_diabetes_bfv_cached.csv')
bfv_uncached = pd.read_csv(f'{path_root}benchmark2_diabetes_bfv_uncached.csv')
ckks_cached = pd.read_csv(f'{path_root}benchmark2_diabetes_ckks_cached.csv')
ckks_uncached = pd.read_csv(f'{path_root}benchmark2_diabetes_ckks_uncached.csv')
paillier_cached = pd.read_csv(f'{path_root}benchmark2_diabetes_paillier_cached.csv')
paillier_uncached = pd.read_csv(f'{path_root}benchmark2_diabetes_paillier_uncached.csv')

# Define colors for each algorithm
colors = {
    'BFV': '#1f77b4',
    'CKKS': '#ff7f0e',
    'Paillier': '#2ca02c'
}

# Get core values
cores = bfv_cached['Cores'].tolist()

# Calculate y-axis limits
# For encryption time
all_encryption_times_cached = pd.concat([
    bfv_cached['Input Encryption Time (sec)'],
    ckks_cached['Input Encryption Time (sec)'],
    paillier_cached['Input Encryption Time (sec)']
])

all_encryption_times_uncached = pd.concat([
    bfv_uncached['Input Encryption Time (sec)'],
    ckks_uncached['Input Encryption Time (sec)'],
    paillier_uncached['Input Encryption Time (sec)']
])

min_encryption_time = 0  # Start at 0 for linear scale
max_encryption_time_cached = all_encryption_times_cached.max() * 1.1
max_encryption_time_uncached = all_encryption_times_uncached.max() * 1.1

# Use the same max for both plots for consistency
max_encryption_time = max(max_encryption_time_cached, max_encryption_time_uncached)

# For inference time
all_inference_times_cached = pd.concat([
    bfv_cached['Model Inference Time (sec)'],
    ckks_cached['Model Inference Time (sec)'],
    paillier_cached['Model Inference Time (sec)']
])

all_inference_times_uncached = pd.concat([
    bfv_uncached['Model Inference Time (sec)'],
    ckks_uncached['Model Inference Time (sec)'],
    paillier_uncached['Model Inference Time (sec)']
])

min_inference_time = 0  # Start at 0 for linear scale
max_inference_time_cached = all_inference_times_cached.max() * 1.1
max_inference_time_uncached = all_inference_times_uncached.max() * 1.1

# Use the same max for both plots for consistency
max_inference_time = max(max_inference_time_cached, max_inference_time_uncached)

# Function to create a figure with non-overlapping bars
def create_figure(title, datasets, y_column, y_range):
    fig = go.Figure()

    for df, name, color in datasets:
        fig.add_trace(
            go.Bar(
                x=df['Cores'],
                y=df[y_column],
                name=name,
                marker_color=color
            )
        )

    # Update layout
    fig.update_layout(
        title_text=title,
        xaxis_title="Number of Cores",
        yaxis_title="Time (seconds)",
        yaxis=dict(range=y_range),
        barmode='group',  # This ensures bars are grouped but not stacked
        height=600,
        width=800,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )

    # Set x-axis to be equally spaced
    fig.update_xaxes(
        tickmode='array',
        tickvals=cores,
        ticktext=[str(core) for core in cores],
        type='category'  # This ensures equal spacing
    )

    return fig

# Create figures for encryption time - cached and uncached separate
fig_encryption_cached = create_figure(
    "Input Encryption Time - Cached",
    [
        (bfv_cached, 'BFV', colors['BFV']),
        (ckks_cached, 'CKKS', colors['CKKS']),
        (paillier_cached, 'Paillier', colors['Paillier'])
    ],
    "Input Encryption Time (sec)",
    [min_encryption_time, max_encryption_time]
)

fig_encryption_uncached = create_figure(
    "Input Encryption Time - Uncached",
    [
        (bfv_uncached, 'BFV', colors['BFV']),
        (ckks_uncached, 'CKKS', colors['CKKS']),
        (paillier_uncached, 'Paillier', colors['Paillier'])
    ],
    "Input Encryption Time (sec)",
    [min_encryption_time, max_encryption_time]
)

# Create figures for inference time - cached and uncached separate
fig_inference_cached = create_figure(
    "Model Inference Time - Cached",
    [
        (bfv_cached, 'BFV', colors['BFV']),
        (ckks_cached, 'CKKS', colors['CKKS']),
        (paillier_cached, 'Paillier', colors['Paillier'])
    ],
    "Model Inference Time (sec)",
    [min_inference_time, max_inference_time]
)

fig_inference_uncached = create_figure(
    "Model Inference Time - Uncached",
    [
        (bfv_uncached, 'BFV', colors['BFV']),
        (ckks_uncached, 'CKKS', colors['CKKS']),
        (paillier_uncached, 'Paillier', colors['Paillier'])
    ],
    "Model Inference Time (sec)",
    [min_inference_time, max_inference_time]
)

# Create a combined view with all four charts
fig_combined = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        "Input Encryption Time - Uncached",
        "Input Encryption Time - Cached",
        "Model Inference Time - Uncached",
        "Model Inference Time - Cached"
    ),
    vertical_spacing=0.15,
    horizontal_spacing=0.1
)

# Add traces for encryption time (cached)
for df, name, color in [
    (bfv_cached, 'BFV', colors['BFV']),
    (ckks_cached, 'CKKS', colors['CKKS']),
    (paillier_cached, 'Paillier', colors['Paillier'])
]:
    fig_combined.add_trace(
        go.Bar(
            x=df['Cores'],
            y=df["Input Encryption Time (sec)"],
            name=name,
            marker_color=color,
            showlegend=True
        ),
        row=1, col=2
    )

# Add traces for encryption time (uncached)
for df, name, color in [
    (bfv_uncached, 'BFV', colors['BFV']),
    (ckks_uncached, 'CKKS', colors['CKKS']),
    (paillier_uncached, 'Paillier', colors['Paillier'])
]:
    fig_combined.add_trace(
        go.Bar(
            x=df['Cores'],
            y=df["Input Encryption Time (sec)"],
            name=name,
            marker_color=color,
            showlegend=False
        ),
        row=1, col=1
    )

# Add traces for inference time (cached)
for df, name, color in [
    (bfv_cached, 'BFV', colors['BFV']),
    (ckks_cached, 'CKKS', colors['CKKS']),
    (paillier_cached, 'Paillier', colors['Paillier'])
]:
    fig_combined.add_trace(
        go.Bar(
            x=df['Cores'],
            y=df["Model Inference Time (sec)"],
            name=name,
            marker_color=color,
            showlegend=False
        ),
        row=2, col=2
    )

# Add traces for inference time (uncached)
for df, name, color in [
    (bfv_uncached, 'BFV', colors['BFV']),
    (ckks_uncached, 'CKKS', colors['CKKS']),
    (paillier_uncached, 'Paillier', colors['Paillier'])
]:
    fig_combined.add_trace(
        go.Bar(
            x=df['Cores'],
            y=df["Model Inference Time (sec)"],
            name=name,
            marker_color=color,
            showlegend=False
        ),
        row=2, col=1
    )

# Update layout for combined figure
fig_combined.update_layout(
    title_text="Homomorphic Encryption Performance Comparison",
    barmode='group',
    height=1000,
    width=1200,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=0.9
    )
)

# Update y-axes with fixed ranges to ensure consistency between plots
fig_combined.update_yaxes(
    title_text="Time (seconds)",
    range=[min_encryption_time, max_encryption_time],
    row=1, col=1
)
fig_combined.update_yaxes(
    title_text="Time (seconds)",
    range=[min_encryption_time, max_encryption_time],
    row=1, col=2
)
fig_combined.update_yaxes(
    title_text="Time (seconds)",
    range=[min_inference_time, max_inference_time],
    row=2, col=1
)
fig_combined.update_yaxes(
    title_text="Time (seconds)",
    range=[min_inference_time, max_inference_time],
    row=2, col=2
)

# Update x-axes with equally spaced core values
for row in [1, 2]:
    for col in [1, 2]:
        fig_combined.update_xaxes(
            title_text="Number of Cores",
            tickmode='array',
            tickvals=cores,
            ticktext=[str(core) for core in cores],
            type='category',
            row=row, col=col
        )

# Display the figures
fig_encryption_uncached.show()
fig_encryption_cached.show()

fig_inference_uncached.show()
fig_inference_cached.show()

fig_combined.show()

# Optional: Save the figures
# fig_encryption_cached.write_html(f"{path_root}encryption_cached.html")
# fig_encryption_uncached.write_html(f"{path_root}encryption_uncached.html")
# fig_inference_cached.write_html(f"{path_root}inference_cached.html")
# fig_inference_uncached.write_html(f"{path_root}inference_uncached.html")
# fig_combined.write_html(f"{path_root}combined_comparison.html")

In [None]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Define path root
path_root = '/content/drive/MyDrive/praxisfiles/Benchmark/'

# Read all CSV files
bfv_cached = pd.read_csv(f'{path_root}benchmark2_diabetes_bfv_cached.csv')
bfv_uncached = pd.read_csv(f'{path_root}benchmark2_diabetes_bfv_uncached.csv')
ckks_cached = pd.read_csv(f'{path_root}benchmark2_diabetes_ckks_cached.csv')
ckks_uncached = pd.read_csv(f'{path_root}benchmark2_diabetes_ckks_uncached.csv')
paillier_cached = pd.read_csv(f'{path_root}benchmark2_diabetes_paillier_cached.csv')
paillier_uncached = pd.read_csv(f'{path_root}benchmark2_diabetes_paillier_uncached.csv')

# Define colors for each algorithm
colors = {
    'BFV': '#1f77b4',
    'CKKS': '#ff7f0e',
    'Paillier': '#2ca02c'
}

# Get core values
cores = bfv_cached['Cores'].tolist()

# Calculate speedup factors for encryption time
bfv_encryption_speedup = []
ckks_encryption_speedup = []
paillier_encryption_speedup = []

# Calculate speedup factors for inference time
bfv_inference_speedup = []
ckks_inference_speedup = []
paillier_inference_speedup = []

# For each core count
for core in cores:
    # Get encryption times for each algorithm at this core count
    bfv_cached_enc = bfv_cached[bfv_cached['Cores'] == core]['Input Encryption Time (sec)'].values[0]
    bfv_uncached_enc = bfv_uncached[bfv_uncached['Cores'] == core]['Input Encryption Time (sec)'].values[0]

    ckks_cached_enc = ckks_cached[ckks_cached['Cores'] == core]['Input Encryption Time (sec)'].values[0]
    ckks_uncached_enc = ckks_uncached[ckks_uncached['Cores'] == core]['Input Encryption Time (sec)'].values[0]

    paillier_cached_enc = paillier_cached[paillier_cached['Cores'] == core]['Input Encryption Time (sec)'].values[0]
    paillier_uncached_enc = paillier_uncached[paillier_uncached['Cores'] == core]['Input Encryption Time (sec)'].values[0]

    # Calculate speedup factor: uncached / cached
    bfv_enc_speedup = bfv_uncached_enc / bfv_cached_enc
    ckks_enc_speedup = ckks_uncached_enc / ckks_cached_enc
    paillier_enc_speedup = paillier_uncached_enc / paillier_cached_enc

    bfv_encryption_speedup.append(bfv_enc_speedup)
    ckks_encryption_speedup.append(ckks_enc_speedup)
    paillier_encryption_speedup.append(paillier_enc_speedup)

    # Get inference times for each algorithm at this core count
    bfv_cached_inf = bfv_cached[bfv_cached['Cores'] == core]['Model Inference Time (sec)'].values[0]
    bfv_uncached_inf = bfv_uncached[bfv_uncached['Cores'] == core]['Model Inference Time (sec)'].values[0]

    ckks_cached_inf = ckks_cached[ckks_cached['Cores'] == core]['Model Inference Time (sec)'].values[0]
    ckks_uncached_inf = ckks_uncached[ckks_uncached['Cores'] == core]['Model Inference Time (sec)'].values[0]

    paillier_cached_inf = paillier_cached[paillier_cached['Cores'] == core]['Model Inference Time (sec)'].values[0]
    paillier_uncached_inf = paillier_uncached[paillier_uncached['Cores'] == core]['Model Inference Time (sec)'].values[0]

    # Calculate speedup factor: uncached / cached
    bfv_inf_speedup = bfv_uncached_inf / bfv_cached_inf
    ckks_inf_speedup = ckks_uncached_inf / ckks_cached_inf
    paillier_inf_speedup = paillier_uncached_inf / paillier_cached_inf

    bfv_inference_speedup.append(bfv_inf_speedup)
    ckks_inference_speedup.append(ckks_inf_speedup)
    paillier_inference_speedup.append(paillier_inf_speedup)

# Calculate min and max for y-axis scaling
min_speedup = 0  # Speedup factor should start at 0
max_speedup = max(
    max(bfv_encryption_speedup), max(ckks_encryption_speedup), max(paillier_encryption_speedup),
    max(bfv_inference_speedup), max(ckks_inference_speedup), max(paillier_inference_speedup)
)

# Add some padding
max_speedup = max_speedup * 1.1  # Add 10% padding

# Create speedup figures
fig_encryption_speedup = go.Figure()

# Add bars for each algorithm (encryption speedup)
fig_encryption_speedup.add_trace(
    go.Bar(
        x=cores,
        y=bfv_encryption_speedup,
        name='BFV',
        marker_color=colors['BFV']
    )
)

fig_encryption_speedup.add_trace(
    go.Bar(
        x=cores,
        y=ckks_encryption_speedup,
        name='CKKS',
        marker_color=colors['CKKS']
    )
)

fig_encryption_speedup.add_trace(
    go.Bar(
        x=cores,
        y=paillier_encryption_speedup,
        name='Paillier',
        marker_color=colors['Paillier']
    )
)

# Update layout for encryption speedup
fig_encryption_speedup.update_layout(
    title_text="Input Encryption Time Speedup Factor (Uncached/Cached)",
    xaxis_title="Number of Cores",
    yaxis_title="Speedup Factor",
    yaxis=dict(range=[min_speedup, max_speedup]),
    barmode='group',
    height=600,
    width=800,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    # Fix layout to ensure bars span the entire width
    xaxis=dict(
        type='category',
        tickmode='array',
        tickvals=cores,
        ticktext=[str(core) for core in cores],
        # These settings help distribute the bars evenly
        rangeslider=dict(visible=False),
        # Set wider margins
        range=[-0.5, len(cores) - 0.5]
    ),
    # Make plot area more square-like to better distribute bars
    margin=dict(l=80, r=80, t=100, b=80)
)

# Add a horizontal line at speedup = 1 (no change)
fig_encryption_speedup.add_shape(
    type="line",
    x0=-0.5,
    y0=1,
    x1=len(cores) - 0.5,
    y1=1,
    line=dict(
        color="gray",
        width=1,
        dash="dash",
    )
)

# Create inference speedup figure
fig_inference_speedup = go.Figure()

# Add bars for each algorithm (inference speedup)
fig_inference_speedup.add_trace(
    go.Bar(
        x=cores,
        y=bfv_inference_speedup,
        name='BFV',
        marker_color=colors['BFV']
    )
)

fig_inference_speedup.add_trace(
    go.Bar(
        x=cores,
        y=ckks_inference_speedup,
        name='CKKS',
        marker_color=colors['CKKS']
    )
)

fig_inference_speedup.add_trace(
    go.Bar(
        x=cores,
        y=paillier_inference_speedup,
        name='Paillier',
        marker_color=colors['Paillier']
    )
)

# Update layout for inference speedup
fig_inference_speedup.update_layout(
    title_text="Model Inference Time Speedup Factor (Uncached/Cached)",
    xaxis_title="Number of Cores",
    yaxis_title="Speedup Factor",
    yaxis=dict(range=[min_speedup, max_speedup]),
    barmode='group',
    height=600,
    width=800,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    # Fix layout to ensure bars span the entire width
    xaxis=dict(
        type='category',
        tickmode='array',
        tickvals=cores,
        ticktext=[str(core) for core in cores],
        # These settings help distribute the bars evenly
        rangeslider=dict(visible=False),
        # Set wider margins
        range=[-0.5, len(cores) - 0.5]
    ),
    # Make plot area more square-like to better distribute bars
    margin=dict(l=80, r=80, t=100, b=80)
)

# Add a horizontal line at speedup = 1 (no change)
fig_inference_speedup.add_shape(
    type="line",
    x0=-0.5,
    y0=1,
    x1=len(cores) - 0.5,
    y1=1,
    line=dict(
        color="gray",
        width=1,
        dash="dash",
    )
)

# Display speedup figures
fig_encryption_speedup.show()
fig_inference_speedup.show()

# Print the speedup values for reference
print("Input Encryption Time Speedup Factor (Uncached/Cached):")
print("Number of Cores:", cores)
print("BFV:", [round(x, 2) for x in bfv_encryption_speedup])
print("CKKS:", [round(x, 2) for x in ckks_encryption_speedup])
print("Paillier:", [round(x, 2) for x in paillier_encryption_speedup])
print("\nModel Inference Time Speedup Factor (Uncached/Cached):")
print("Number of Cores:", cores)
print("BFV:", [round(x, 2) for x in bfv_inference_speedup])
print("CKKS:", [round(x, 2) for x in ckks_inference_speedup])
print("Paillier:", [round(x, 2) for x in paillier_inference_speedup])

Input Encryption Time Speedup Factor (Uncached/Cached):
Number of Cores: [4, 8, 16, 32]
BFV: [np.float64(1.26), np.float64(2.44), np.float64(2.37), np.float64(2.38)]
CKKS: [np.float64(1.36), np.float64(2.44), np.float64(2.5), np.float64(2.65)]
Paillier: [np.float64(1.01), np.float64(1.03), np.float64(0.9), np.float64(0.93)]

Model Inference Time Speedup Factor (Uncached/Cached):
Number of Cores: [4, 8, 16, 32]
BFV: [np.float64(1.35), np.float64(1.2), np.float64(1.46), np.float64(1.18)]
CKKS: [np.float64(1.64), np.float64(1.21), np.float64(1.14), np.float64(1.6)]
Paillier: [np.float64(0.86), np.float64(1.06), np.float64(1.11), np.float64(1.02)]
