In [42]:
import client
import server
import utils

In [48]:
import importlib
importlib.reload(client)
importlib.reload(server)
importlib.reload(utils)

<module 'utils' from '/Users/gael/Desktop/het-opl/src/utils.py'>

In [26]:
import numpy as np

from vowpalwabbit.sklearn import VW
from sklearn.model_selection import train_test_split

In [27]:
np.random.seed(42)

In [30]:
def givens_rotation(vec, i, j, theta):
    """
    Apply a Givens rotation to a 1D NumPy array in the plane spanned by two given axes.

    Args:
    vec (np.array): The input 1D NumPy array.
    i (int): The first axis.
    j (int): The second axis.
    theta (float): The rotation angle in radians.

    Returns:
    np.array: The resulting 1D NumPy array after applying the Givens rotation.
    """
    if i >= len(vec) or j >= len(vec):
        raise ValueError("Axes indices must be within the range of the input array.")

    if i == j:
        raise ValueError("Axes indices must be different.")

    rotated_vec = vec.copy()
    cos_theta = np.cos(theta)
    sin_theta = np.sin(theta)

    rotated_vec[i] = cos_theta * vec[i] - sin_theta * vec[j]
    rotated_vec[j] = sin_theta * vec[i] + cos_theta * vec[j]

    return rotated_vec

def generate_separated_vectors_grid(n, s, k, num_vectors=4):
    # Calculate the number of cells along each axis
    num_cells = int(np.ceil(2 * s / k))

    # Create a list of all cell indices
    cell_indices = np.arange(num_cells ** n)
    np.random.shuffle(cell_indices)

    # Randomly select distinct cells for each of the num_vectors points
    selected_cells = cell_indices[:num_vectors]

    # Convert cell indices to n-dimensional grid indices
    grid_indices = np.array(np.unravel_index(selected_cells, [num_cells] * n)).T

    # Calculate the lower bound of each cell
    lower_bounds = -s + grid_indices * k

    # Sample a point within each of the chosen cells
    vectors = lower_bounds + np.random.rand(num_vectors, n) * k

    return vectors

def generate_observational_data(num_clients, num_actions, num_features, train_sizes, test_sizes=None):
    if test_sizes is None:
        test_sizes = [10_000] * num_clients

#     action_vec = np.random.randn(num_features)
#     action_vec /= np.linalg.norm(action_vec)
#     # action_vec *= 0.5
    
#     thetas = [k*np.pi/num_actions for k in range(num_actions)]
#     action_params = np.array([givens_rotation(action_vec, 0, 1, theta) for theta in thetas])
#     assert action_params.shape == (num_actions, num_features)

#     # action_params = np.random.randn(num_features, num_actions)
    # action_params /= np.linalg.norm(action_params, axis=0)
    # action_params = 0.5 * action_params

    action_params = generate_separated_vectors_grid(num_features, 1, np.sqrt(num_features), 4)

    weights = []
    for i in range(num_actions):
        other_vectors = np.delete(action_params, [i], axis=0)
        weights.append(sum([1/np.linalg.norm(action_params[i] - v) for v in other_vectors]))
        if not all(np.linalg.norm(action_params[i] - v) > np.sqrt(num_features) for v in other_vectors):
            raise ValueError("Vectors not sufficiently separated. Resample.")
    weights = [w / sum(weights) for w in weights]

    data = {}
    aux = {}
    for client_id in range(num_clients):
        # Generate data
        num_samples = train_sizes[client_id] + test_sizes[client_id]
        # contexts = np.random.uniform(low=-1, high=1, size=(num_samples, num_features))
        # contexts = np.random.normal(loc=0, scale=1, size=(num_samples, num_features))
        contexts = np.random.multivariate_normal(mean=action_params[client_id], cov=np.eye(num_features), size=num_samples)
        rewards_vectors = np.zeros((num_samples, num_actions))
        for i in range(num_samples):
            for a in range(num_actions):
                rewards_vectors[i, a] += np.dot(contexts[i], action_params[a])
                if contexts[i][0] > 0.25:
                    rewards_vectors[i, a] += -np.max([np.dot(contexts[i], action_params[aprime]) for aprime in range(num_actions)]) 
                # rewards_vectors[i, a] = np.clip(1 / np.linalg.norm(contexts[i] - action_params[a]), a_min=0, a_max=10)
                # rewards_vectors[i, a] = np.exp(1-1/np.linalg.norm(contexts[i]-action_params[a]))
                # rewards_vectors[i, a] = 1 - np.exp(-1/np.linalg.norm(contexts[i]-action_params[a]))
        # actions = np.random.choice(num_actions, p=[0.7, 0.1, 0.1, 0.1], size=num_samples)
        actions = np.random.choice(num_actions, size=num_samples)
        epsilons = np.random.normal(loc=0, scale=1, size=(num_samples, num_actions))
        noisy_rewards_vectors = rewards_vectors + epsilons
        noisy_rewards = noisy_rewards_vectors[np.arange(num_samples), actions]
        
        # Get train-test split
        (X_train, X_test,
         A_train, A_test,
         Y_train, Y_test,
         true_costs_train, true_costs_test) = train_test_split(contexts, actions, noisy_rewards, -rewards_vectors,
                                                               train_size=train_sizes[client_id],
                                                               shuffle=False)

        # Compute AIPW scores
        crossfit_map, mu, e = utils.cross_fit_nuisance_params(X_train, A_train, Y_train, num_actions)
        AIPW_vectors = utils.compute_AIPW_scores(X_train, A_train, Y_train, num_actions, crossfit_map, mu, e)

        # Convert data to VW format
        vw_data = utils.to_vw_format(X_train, A_train, -AIPW_vectors)

        data[client_id] = vw_data
        aux[client_id] = {"X_train": X_train, "X_test": X_test,
                          "A_train": A_train, "A_test": A_test,
                          "Y_train": Y_train, "Y_test": Y_test,
                          "true_costs_train": true_costs_train, "true_costs_test": true_costs_test,
                          "weight": weights[client_id]}
    
    return data, aux

In [31]:
# Federated config
NUM_ROUNDS = 3
NUM_CLIENTS = 4
NUM_ACTIONS = 4
NUM_FEATURES = 10
TRAIN_SIZES = np.array([1, 1, 1, 1]) * 100_000
CLIENT_WEIGHTS = TRAIN_SIZES / np.sum(TRAIN_SIZES)

# Generate data
data, aux = generate_observational_data(num_clients=NUM_CLIENTS,
                                        num_actions=NUM_ACTIONS,
                                        num_features=NUM_FEATURES,
                                        train_sizes=TRAIN_SIZES)

### Train optimal local model

In [19]:
# Set sample size
sample_size = 100_000

# Train on local data
client_id = 0
opt_model = VW(csoaa=NUM_ACTIONS,
               convert_to_vw=False,
               convert_labels=False,
               passes=10)
opt_model.fit(data[client_id][:sample_size])

# Evaluate on test data
X_test_vw = utils.to_vw_format(aux[client_id]["X_test"])
regret, opt_reward, reward = utils.compute_regret(X_test_vw, opt_model, aux[client_id]["true_costs_test"])
print(f"Local Client {client_id}: opt_reward={opt_reward}, reward={reward}, regret={regret}")

Local Client 0: opt_reward=7.894604964354001, reward=7.894345217957393, regret=0.00025974639661475367


### Train local model

In [20]:
# Set sample size
sample_size = 1000

# Train on local data
client_id = 0
model = VW(csoaa=NUM_ACTIONS,
           convert_to_vw=False,
           convert_labels=False,
           passes=10)
model.fit(data[client_id][:sample_size])

# Evaluate on test data
X_test_vw = utils.to_vw_format(aux[client_id]["X_test"])
regret, opt_reward, reward = utils.compute_regret(X_test_vw, model, aux[client_id]["true_costs_test"], opt_model)
print(f"Local Client {client_id}: opt_reward={opt_reward}, reward={reward}, regret={regret}")

class Model():
    def predict(self, X):
        return np.random.choice(NUM_ACTIONS, size=len(X_test_vw))
regret, opt_reward, reward = utils.compute_regret(X_test_vw, Model(), aux[client_id]["true_costs_test"], opt_model)
print(f"Random Local Client {client_id}: opt_reward={opt_reward}, reward={reward}, regret={regret}")

Local Client 0: opt_reward=7.894345217957393, reward=7.864565790277059, regret=0.029779427680330383
Random Local Client 0: opt_reward=7.894345217957393, reward=0.757282999227291, regret=7.1370622187300965


### Train aggregate model

In [22]:
# Set sample size
sample_sizes = [1000] * NUM_CLIENTS

# Aggregate all data
X_train = []
A_train = []
Y_train = []
true_costs_train = []
true_costs_test = []
X_test = []
for client_id in range(NUM_CLIENTS):
    sample_size = sample_sizes[client_id]
    X_train.extend(aux[client_id]["X_train"][:sample_size])
    A_train.extend(aux[client_id]["A_train"][:sample_size])
    Y_train.extend(aux[client_id]["Y_train"][:sample_size])
    true_costs_train.extend(aux[client_id]["true_costs_train"][:sample_size])
    true_costs_test.extend(aux[client_id]["true_costs_test"])
    X_test.extend(aux[client_id]["X_test"])
X_train = np.array(X_train)
A_train = np.array(A_train)
Y_train = np.array(Y_train)
true_costs_train = np.array(true_costs_train)
true_costs_test = np.array(true_costs_test)
X_test = np.array(X_test)

# Compute AIPW scores
crossfit_map, mu, e = utils.cross_fit_nuisance_params(X_train, A_train, Y_train, NUM_ACTIONS)
noisy_costs_train = -utils.compute_AIPW_scores(X_train, A_train, Y_train, NUM_ACTIONS, crossfit_map, mu, e)
data_train = utils.to_vw_format(X_train, A_train, noisy_costs_train)

# Train model
model = VW(csoaa=NUM_ACTIONS,
           convert_to_vw=False,
           convert_labels=False,
           passes=10)
model.fit(data_train)

# Evaluate on test data
X_test_vw = utils.to_vw_format(X_test)
regret, opt_reward, reward = utils.compute_regret(X_test_vw, model, true_costs_test, opt_model)
# y_pred = model.predict(X_test_vw)
# print(y_pred[-100:])
# print(model.get_coefs())
print(f"Aggregate: opt_reward={opt_reward}, reward={reward}, regret={regret}")

Aggregate: opt_reward=6.393984563552918, reward=6.385915978240404, regret=0.008068585312514958


### Train optimal global model (pooled & weighted)

In [39]:
# Aggregate all data
X_train = []
A_train = []
Y_train = []
true_costs_train = []
true_costs_test = []
X_test = []

count = 0
idx_to_weight_mapping = {}
for client_id in range(NUM_CLIENTS):
    lo = len(X_train)
    X_train.extend(aux[client_id]["X_train"])
    A_train.extend(aux[client_id]["A_train"])
    Y_train.extend(aux[client_id]["Y_train"])
    true_costs_train.extend(aux[client_id]["true_costs_train"])
    true_costs_test.extend(aux[client_id]["true_costs_test"])
    X_test.extend(aux[client_id]["X_test"])
    hi = len(X_train)
    idx_to_weight_mapping.update({i:aux[client_id]["weight"]*sum(TRAIN_SIZES)/TRAIN_SIZES[client_id] for i in range(lo, len(X_train))})
    
X_train = np.array(X_train)
A_train = np.array(A_train)
Y_train = np.array(Y_train)
true_costs_train = np.array(true_costs_train)
true_costs_test = np.array(true_costs_test)
X_test = np.array(X_test)

# Compute AIPW scores
crossfit_map, mu, e = utils.cross_fit_nuisance_params(X_train, A_train, Y_train, NUM_ACTIONS)
noisy_costs_train = -utils.compute_AIPW_scores(X_train, A_train, Y_train, NUM_ACTIONS, crossfit_map, mu, e)
data_train = utils.to_vw_format(X_train, A_train, noisy_costs_train, idx_to_weight_mapping)

# Train model
opt_model = VW(csoaa=NUM_ACTIONS,
               convert_to_vw=False,
               convert_labels=False,
               passes=10)
opt_model.fit(data_train)

# Evaluate on test data
X_test_vw = utils.to_vw_format(X_test)
regret, opt_reward, reward = utils.compute_regret(X_test_vw, opt_model, true_costs_test, opt_model=None, idx_to_weight_mapping=idx_to_weight_mapping)
# y_pred = model.predict(X_test_vw)
# print(y_pred[-100:])
# print(model.get_coefs())
print(f"Aggregate: opt_reward={opt_reward}, reward={reward}, regret={regret}")

Aggregate: opt_reward=3.6447416928682155, reward=3.6446037649485157, regret=0.00013792791969857574


### Train federated model (weighted)

In [49]:
# Set sample size
sample_sizes = [1000] * NUM_CLIENTS
sample_sizes[0] = 100

# Subsample data
data_mod = {}
aux_mod = {}
client_weights = []
for client_id in range(NUM_CLIENTS):
    sample_size = sample_sizes[client_id]
    data_mod[client_id] = data[client_id][:sample_size]
    aux_mod[client_id] = {"X_test": aux[client_id]["X_test"][:sample_size],
                          "true_costs_test": aux[client_id]["true_costs_test"][:sample_size]}
    client_weights.append(aux[client_id]["weight"])

# Run federated learning
global_vw = server.run_federated_learning(data_mod, aux_mod, opt_model,
                                          num_features=NUM_FEATURES,
                                          num_classes=NUM_ACTIONS,
                                          num_rounds=NUM_ROUNDS,
                                          num_clients=NUM_CLIENTS,
                                          client_weights=client_weights)

INFO flwr 2023-04-21 04:26:18,940 | app.py:145 | Starting Flower simulation, config: ServerConfig(num_rounds=3, round_timeout=None)
2023-04-21 04:26:26,832	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8277 [39m[22m
INFO flwr 2023-04-21 04:26:28,357 | app.py:179 | Flower VCE: Ray initialized with resources: {'memory': 5147116749.0, 'object_store_memory': 2147483648.0, 'CPU': 8.0, 'node:127.0.0.1': 1.0}
INFO flwr 2023-04-21 04:26:28,359 | server.py:86 | Initializing global parameters
INFO flwr 2023-04-21 04:26:28,360 | server.py:270 | Requesting initial parameters from one random client
INFO flwr 2023-04-21 04:26:29,417 | server.py:274 | Received initial parameters from one random client
INFO flwr 2023-04-21 04:26:29,418 | server.py:88 | Evaluating initial parameters
INFO flwr 2023-04-21 04:26:29,419 | server.py:101 | FL starting
DEBUG flwr 2023-04-21 04:26:29,419 | server.py:215 | fit_round 1: strategy sampled 4 clients (out of 4)


[2m[36m(launch_and_get_parameters pid=39499)[0m [Client 1] get_parameters
[2m[36m(launch_and_fit pid=39499)[0m Client 0, training finished for round 1
[2m[36m(launch_and_fit pid=39499)[0m [Client 0] get_parameters


DEBUG flwr 2023-04-21 04:26:30,593 | server.py:229 | fit_round 1 received 4 results and 0 failures
DEBUG flwr 2023-04-21 04:26:30,618 | server.py:165 | evaluate_round 1: strategy sampled 4 clients (out of 4)


[2m[36m(launch_and_fit pid=39497)[0m Client 1, training finished for round 1
[2m[36m(launch_and_fit pid=39497)[0m [Client 1] get_parameters
[2m[36m(launch_and_fit pid=39502)[0m Client 3, training finished for round 1
[2m[36m(launch_and_fit pid=39502)[0m [Client 3] get_parameters
[2m[36m(launch_and_fit pid=39498)[0m Client 2, training finished for round 1
[2m[36m(launch_and_fit pid=39498)[0m [Client 2] get_parameters


DEBUG flwr 2023-04-21 04:26:30,801 | server.py:179 | evaluate_round 1 received 4 results and 0 failures
DEBUG flwr 2023-04-21 04:26:30,802 | server.py:215 | fit_round 2: strategy sampled 4 clients (out of 4)


[2m[36m(launch_and_evaluate pid=39499)[0m [Client 2] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39497)[0m [Client 1] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39502)[0m [Client 0] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39498)[0m [Client 3] evaluate, config: {}


DEBUG flwr 2023-04-21 04:26:31,067 | server.py:229 | fit_round 2 received 4 results and 0 failures
DEBUG flwr 2023-04-21 04:26:31,097 | server.py:165 | evaluate_round 2: strategy sampled 4 clients (out of 4)


[2m[36m(launch_and_fit pid=39499)[0m Client 2, training finished for round 2
[2m[36m(launch_and_fit pid=39499)[0m [Client 2] get_parameters
[2m[36m(launch_and_fit pid=39497)[0m Client 0, training finished for round 2
[2m[36m(launch_and_fit pid=39497)[0m [Client 0] get_parameters
[2m[36m(launch_and_fit pid=39502)[0m Client 1, training finished for round 2
[2m[36m(launch_and_fit pid=39502)[0m [Client 1] get_parameters
[2m[36m(launch_and_fit pid=39498)[0m Client 3, training finished for round 2
[2m[36m(launch_and_fit pid=39498)[0m [Client 3] get_parameters


DEBUG flwr 2023-04-21 04:26:31,260 | server.py:179 | evaluate_round 2 received 4 results and 0 failures
DEBUG flwr 2023-04-21 04:26:31,262 | server.py:215 | fit_round 3: strategy sampled 4 clients (out of 4)


[2m[36m(launch_and_evaluate pid=39499)[0m [Client 2] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39497)[0m [Client 0] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39502)[0m [Client 1] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39498)[0m [Client 3] evaluate, config: {}
[2m[36m(launch_and_fit pid=39499)[0m Client 0, training finished for round 3
[2m[36m(launch_and_fit pid=39499)[0m [Client 0] get_parameters


DEBUG flwr 2023-04-21 04:26:31,535 | server.py:229 | fit_round 3 received 4 results and 0 failures
DEBUG flwr 2023-04-21 04:26:31,561 | server.py:165 | evaluate_round 3: strategy sampled 4 clients (out of 4)


[2m[36m(launch_and_fit pid=39497)[0m Client 1, training finished for round 3
[2m[36m(launch_and_fit pid=39497)[0m [Client 1] get_parameters
[2m[36m(launch_and_fit pid=39502)[0m Client 3, training finished for round 3
[2m[36m(launch_and_fit pid=39502)[0m [Client 3] get_parameters
[2m[36m(launch_and_fit pid=39498)[0m Client 2, training finished for round 3
[2m[36m(launch_and_fit pid=39498)[0m [Client 2] get_parameters


DEBUG flwr 2023-04-21 04:26:31,727 | server.py:179 | evaluate_round 3 received 4 results and 0 failures
INFO flwr 2023-04-21 04:26:31,728 | server.py:144 | FL finished in 2.3085987319991546
INFO flwr 2023-04-21 04:26:31,729 | app.py:202 | app_fit: losses_distributed [(1, 0.7726766192184403), (2, 0.7726766192184403), (3, 0.7726766192184403)]
INFO flwr 2023-04-21 04:26:31,730 | app.py:203 | app_fit: metrics_distributed {'opt_reward': [(1, 3.2902429044683608), (2, 3.4989963815954925), (3, 3.551511070610543)], 'reward': [(1, 1.82909366445219), (2, 1.8931299359173421), (3, 1.985364991479188)], 'regret': [(1, 1.4611492400161732), (2, 1.605866445678153), (3, 1.5661460791313573)]}
INFO flwr 2023-04-21 04:26:31,731 | app.py:204 | app_fit: losses_centralized []
INFO flwr 2023-04-21 04:26:31,732 | app.py:205 | app_fit: metrics_centralized {}


[2m[36m(launch_and_evaluate pid=39499)[0m [Client 1] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39497)[0m [Client 0] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39502)[0m [Client 2] evaluate, config: {}
[2m[36m(launch_and_evaluate pid=39498)[0m [Client 3] evaluate, config: {}


In [50]:
global_vw

History (loss, distributed):
	round 1: 0.7726766192184403
	round 2: 0.7726766192184403
	round 3: 0.7726766192184403
History (metrics, distributed):
{'opt_reward': [(1, 3.2902429044683608), (2, 3.4989963815954925), (3, 3.551511070610543)], 'reward': [(1, 1.82909366445219), (2, 1.8931299359173421), (3, 1.985364991479188)], 'regret': [(1, 1.4611492400161732), (2, 1.605866445678153), (3, 1.5661460791313573)]}