In [1]:
import sys
sys.path.append('../../..')

import numpy as np
import random
import pandas as pd
import time
from matplotlib import pyplot as plt

from core.points import *
from core.ranges import *
from algorithms.fairness.fair_epsnet import *
from core.ranges import get_range_space
from core.verification import is_fair_epsnet

Sampling method:

In [2]:
def report_fair_epsnet_sample(n, m, eps, ratios, c1, dim):
    success_prob = 0.9
    
    
    color_counts = []
    for ratio in ratios:
        color_counts.append(int(n * ratio))
    
    points = []
    for i, count in enumerate(color_counts):
        points += [Point(point=[random.uniform(0, 1), random.uniform(0, 1)], color=i) for _ in range(count)]
    
    ranges = []
    for _ in range(m):
        # Generate a random normal vector in R^d
        normal = [random.uniform(-1, 1) for _ in range(dim)]
        
        # Calculate the range of possible dot products with the [0, 1]^d hypercube
        min_dot = sum(min(0, n) for n in normal)  # Minimum dot product with [0, 1]^d
        max_dot = sum(max(0, n) for n in normal)  # Maximum dot product with [0, 1]^d
        
        # Choose an offset within the range [min_dot, max_dot] to ensure intersection
        offset = random.uniform(min_dot, max_dot)
        
        # Create a HalfspaceRange object
        halfspace = HalfspaceRange(normal=normal, offset=offset)
        ranges.append(halfspace)
        
    rangespace = get_range_space(points, ranges)
    vc = dim + 1

    heavy_ranges = []
    for r in rangespace:
        if len(r) >= n * eps:
            heavy_ranges.append(r)
            
    print(f"n: {n}, m: {m}, vc: {vc}, epsilon: {eps}, number of heavy ranges: {len(heavy_ranges)}")

    # for _ in range(10):
    start = time.time()
    epsnet = build_fair_epsnet_sample(
        points=points,
        rangespace=rangespace,
        vc=vc,
        epsilon=eps,
        fairconfig=FairConfig(k=len(ratios), fairness=FairnessMeasure.DP),
        success_prob=success_prob,
        weights=None,
        c1=c1
    )
    end = time.time()

    print(f"Number of points in epsnet: {len(epsnet)}, time taken: {end - start:.6f} seconds")

    success = is_fair_epsnet(epsnet=epsnet, rangespace=rangespace, epsilon=eps, points=points)
    print(f"Success: {success}")  # Verify the eps-net
    
    return (n, m, 
            success_prob, 
            eps, 
            end - start, 
            success, 
            len(heavy_ranges), 
            len([p for p in epsnet if p.color == 0]), len([p for p in epsnet if p.color == 1]),
            len(ratios), # k
            ratios,
            [len([p for p in epsnet if p.color == i]) for i in range(len(ratios))], # color counts
            )

In [None]:
report_fair_epsnet_sample(2**10, 2**10, 0.6, [0.5, 0.5], 1/2, 32)  # Example usage

In [18]:
n_values = [2**9, 2**10, 2**11, 2**12]
m_values = [2**9, 2**10, 2**11, 2**12]
eps_values = [0.6]
dims = [4, 8, 16, 32]
rates = [
    [0.5, 0.5]
]

aggregated_results = {
    "n": [],
    "m": [],
    "prob": [],
    "eps": [],
    "time": [],
    "success": [],
    "heavy_ranges": [],
    "tries": [],
    "red_points": [],
    "blue_points": [],
    "k": [],
    "ratios": [],
    "counts": [],
    "dim": []
}

for n in n_values:
    for m in m_values:
        for eps in eps_values:
            for dim in dims:
                ratio = rates[0]
                tries = 20
                print(f"Running for n={n}, m={m}, eps={eps}, dim={dim}")
                result = report_fair_epsnet_sample(n, m, eps, ratio, c1=1/2, dim=dim)
                while not result[5] and tries > 0:
                    print("Retrying...")
                    result = report_fair_epsnet_sample(n, m, eps, ratio, c1=1/2, dim=dim)
                    tries -= 1
                
                aggregated_results["n"].append(result[0])
                aggregated_results["m"].append(result[1])
                aggregated_results["prob"].append(result[2])
                aggregated_results["eps"].append(result[3])
                aggregated_results["time"].append(result[4])
                aggregated_results["success"].append(result[5])
                aggregated_results["heavy_ranges"].append(result[6])
                aggregated_results["tries"].append(10 - tries)
                aggregated_results["red_points"].append(result[7])
                aggregated_results["blue_points"].append(result[8])
                aggregated_results["k"].append(result[9])
                aggregated_results["ratios"].append(result[10])
                aggregated_results["counts"].append(result[11])
                aggregated_results["dim"].append(dim)

result = pd.DataFrame(aggregated_results)

Running for n=512, m=512, eps=0.6, dim=4
n: 512, m: 512, vc: 5, epsilon: 0.6, number of heavy ranges: 240
[build_fair_epsnet_sample] epsnet size m: 316, v: 1.5
[_augment_epsnet] epsnet colors count:
	[_augment_epsnet] Color 0: 165
	[_augment_epsnet] Color 1: 151
[_augment_epsnet] Color 0 to add: 72
[_augment_epsnet] Color 1 to add: 86
Number of points in epsnet: 474, time taken: 0.001031 seconds
Success: True
Running for n=512, m=512, eps=0.6, dim=8
n: 512, m: 512, vc: 9, epsilon: 0.6, number of heavy ranges: 237
[build_fair_epsnet_sample] epsnet size m: 512, v: 1.5
[_augment_epsnet] epsnet colors count:
	[_augment_epsnet] Color 0: 253
	[_augment_epsnet] Color 1: 259
[_augment_epsnet] Color 0 to add: 131
[_augment_epsnet] Color 1 to add: 125
Number of points in epsnet: 696, time taken: 0.001402 seconds
Success: True
Running for n=512, m=512, eps=0.6, dim=16
n: 512, m: 512, vc: 17, epsilon: 0.6, number of heavy ranges: 254
[build_fair_epsnet_sample] epsnet size m: 512, v: 1.5
[_augment_

In [None]:
# result.to_csv("dp_fair_epsnet_halfspace_sample.csv", index=False)

Sketch and merge:

In [3]:
def report_fair_epsnet_sketch_merge(n, m, eps, ratios, c1, dim):
    success_prob = 0.9
    
    
    color_counts = []
    for ratio in ratios:
        color_counts.append(int(n * ratio))
    
    points = []
    for i, count in enumerate(color_counts):
        points += [Point(point=[random.uniform(0, 1), random.uniform(0, 1)], color=i) for _ in range(count)]
    
    ranges = []
    for _ in range(m):
        # Generate a random normal vector in R^d
        normal = [random.uniform(-1, 1) for _ in range(dim)]
        
        # Calculate the range of possible dot products with the [0, 1]^d hypercube
        min_dot = sum(min(0, n) for n in normal)  # Minimum dot product with [0, 1]^d
        max_dot = sum(max(0, n) for n in normal)  # Maximum dot product with [0, 1]^d
        
        # Choose an offset within the range [min_dot, max_dot] to ensure intersection
        offset = random.uniform(min_dot, max_dot)
        
        # Create a HalfspaceRange object
        halfspace = HalfspaceRange(normal=normal, offset=offset)
        ranges.append(halfspace)
        
    rangespace = get_range_space(points, ranges)
    vc = dim + 1

    heavy_ranges = []
    for r in rangespace:
        if len(r) >= n * eps:
            heavy_ranges.append(r)
            
    print(f"n: {n}, m: {m}, vc: {vc}, epsilon: {eps}, number of heavy ranges: {len(heavy_ranges)}")

    # for _ in range(10):
    start = time.time()
    epsnet = build_fair_epsnet_sketch_merge(
        points=points,
        rangespace=rangespace,
        vc=vc,
        epsilon=eps,
        fairconfig=FairConfig(k=len(ratios), fairness=FairnessMeasure.DP),
        c1=0,
        c2=c1
    )
    end = time.time()

    print(f"Number of points in epsnet: {len(epsnet)}, time taken: {end - start:.6f} seconds")

    success = is_fair_epsnet(epsnet=epsnet, rangespace=rangespace, epsilon=eps, points=points)
    print(f"Success: {success}")  # Verify the eps-net
    
    return (n, m, 
            success_prob, 
            eps, 
            end - start, 
            success, 
            len(heavy_ranges), 
            len([p for p in epsnet if p.color == 0]), len([p for p in epsnet if p.color == 1]),
            len(ratios), # k
            ratios,
            [len([p for p in epsnet if p.color == i]) for i in range(len(ratios))], # color counts
            )

In [4]:
# n_values = [2**10]
# m_values = [2**9, 2**10, 2**11, 2**12]
# eps_values = [0.6]
# dims = [4, 8, 16, 32]
rates = [
    [0.5, 0.5]
]

n_values = [2**10]#, 2**12]
m_values = [2**9, 2**10, 2**11, 2**12]
dims = [4, 8, 16, 32]
eps_values = [0.9]

aggregated_results = {
    "n": [],
    "m": [],
    "prob": [],
    "eps": [],
    "time": [],
    "success": [],
    "heavy_ranges": [],
    "tries": [],
    "red_points": [],
    "blue_points": [],
    "k": [],
    "ratios": [],
    "counts": [],
    "dim": []
}

for n in n_values:
    for m in m_values:
        for eps in eps_values:
            for dim in dims:
                ratio = rates[0]
                tries = 20
                print(f"Running for n={n}, m={m}, eps={eps}, dim={dim}")
                result = report_fair_epsnet_sketch_merge(n, m, eps, ratio, c1=1, dim=dim)
                while not result[5] and tries > 0:
                    print("Retrying...")
                    result = report_fair_epsnet_sketch_merge(n, m, eps, ratio, c1=1, dim=dim)
                    tries -= 1
                
                aggregated_results["n"].append(result[0])
                aggregated_results["m"].append(result[1])
                aggregated_results["prob"].append(result[2])
                aggregated_results["eps"].append(result[3])
                aggregated_results["time"].append(result[4])
                aggregated_results["success"].append(result[5])
                aggregated_results["heavy_ranges"].append(result[6])
                aggregated_results["tries"].append(10 - tries)
                aggregated_results["red_points"].append(result[7])
                aggregated_results["blue_points"].append(result[8])
                aggregated_results["k"].append(result[9])
                aggregated_results["ratios"].append(result[10])
                aggregated_results["counts"].append(result[11])
                aggregated_results["dim"].append(dim)

result = pd.DataFrame(aggregated_results)

Running for n=1024, m=512, eps=0.9, dim=4
n: 1024, m: 512, vc: 5, epsilon: 0.9, number of heavy ranges: 189
[build_fair_epsnet_sketch_merge] epsnet size m: 185
[build_fair_epsnet_sketch_merge] partition size p: 256
[build_fair_epsnet_sketch_merge] Starting sketch-and-merge...
[_sketch_merge] pair: 1 / 2 of total nodes: 4
[_greedy_discrepancy_halving] counter: 256 / 256
[_sketch_merge] pair: 2 / 2 of total nodes: 4
[_greedy_discrepancy_halving] counter: 256 / 256
[_sketch_merge] pair: 1 / 1 of total nodes: 2
[_greedy_discrepancy_halving] counter: 256 / 256

Number of points in epsnet: 256, time taken: 30.329808 seconds
Success: True
Running for n=1024, m=512, eps=0.9, dim=8
n: 1024, m: 512, vc: 9, epsilon: 0.9, number of heavy ranges: 213
[build_fair_epsnet_sketch_merge] epsnet size m: 333
[build_fair_epsnet_sketch_merge] partition size p: 512
[build_fair_epsnet_sketch_merge] Starting sketch-and-merge...
[_sketch_merge] pair: 1 / 1 of total nodes: 2
[_greedy_discrepancy_halving] counter

In [None]:
# result.to_csv("dp_fair_epsnet_halfspace_sketch_merge.csv", index=False)