In [None]:
import graph_generator as gg
import obm_dp as dp
import numpy as np
import matplotlib.pyplot as plt
from torch_geometric.loader import DataLoader
import torch
from util import diff
from gnn_library.util import train, objectview

%load_ext autoreload
%autoreload 2

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')
print("PyTorch has version {}".format(torch.__version__))
print('Using device:', device)

In [None]:
args = {
    'processor':         'GENConv',
    'head':              'regression',
    'num_layers':        2,
    'num_mlp_layers':    2,
    'aggr':              'max',
    'batch_size':        25,
    'node_feature_dim':  4,
    'edge_feature_dim':  1,
    'graph_feature_dim': 2,
    'hidden_dim':        64,
    'dropout':           0.5,
    'epochs':            50,
    'opt':               'adam',
    'opt_scheduler':     'none',
    'opt_restart':       0,
    'weight_decay':      5e-3,
    'lr':                0.0001,
    'device':            device
}
args = objectview(args)

m = 10; n = 6; train_num = 100; test_num = 30

er_config = {
    'graph_type': 'ER',
    'p': 1,
    'weighted': True
}
ba_config = {
    'graph_type': 'BA',
    'ba_param': 3,
    'weighted': True
}
geom_config = {
    'graph_type': 'GEOM',
    'threshold': 0.2,
    'scaling': 1 / np.sqrt(2)
}

train_dataset = DataLoader(
    [
        *gg.generate_examples(train_num, m, n, [0.8] * m, args.head, **er_config),
        *gg.generate_examples(train_num, m, n, [0.8] * m, args.head, **ba_config),
        *gg.generate_examples(2 * train_num, m, n, [0.8] * m, args.head, **geom_config),
        #*gg.generate_examples(25, 8, 8, [0.8] * m, args.head, **geom_config),
    ]
)
test_dataset = DataLoader(
    [
        *gg.generate_examples(test_num, m, n, [0.8] * m, args.head, **er_config),
        *gg.generate_examples(test_num, m, n, [0.8] * m, args.head, **ba_config),
        *gg.generate_examples(2 * test_num, m, n, [0.8] * m, args.head, **geom_config), 
    ]
)



In [None]:
_, _, trained_model, _ = train(train_dataset, test_dataset, args)

In [None]:
num_trials = 250
node_configs = [(n, 16) for n in range(16, 52, 4)]
node_configs = [(48, 16), (32, 32), (16, 48)]
graph_configs = [
    {
        'graph_type': 'GM'
    },
    {
        'graph_type': 'ER',
        'p': 0.75,
        'weighted': True
    },
    {
        'graph_type': 'BA',
        'ba_param': 4,
        'weighted': True
    },
    {
        'graph_type': 'GEOM',
        'threshold': 0.2,
        'scaling': 1 / np.sqrt(2),
    }
]

def test_model(trained_model, num_trials, node_configs, graph_configs):
    for m, n in node_configs:
        for config in graph_configs:
            print(m, n, config)
            greedy_vals = []
            learned_vals = []
            for _ in range(num_trials):
                A = gg.sample_bipartite_graph(m, n, **config)
                p = [0.8 for _ in range(m)]
                coin_flips = [np.random.binomial(1, _p) for _p in p]
                all_nodes = np.arange(n + m + 1)
                offline_nodes = frozenset(np.arange(n))
                matching = []
                value = 0
                for t in range(m):
                    if coin_flips[t]:
                        input = gg._to_pyg_test(A, p, offline_nodes, t)
                        pred = trained_model(input.x, input.edge_index, input.edge_attr, input.batch, input.graph_features)
                        if pred.detach().numpy() <= 0.5:
                            mask = input.neighbors.detach().numpy()[:A.shape[1]]
                            if np.any(mask):
                                chosen_index = np.argmax(A[t, :][mask])
                                choice = all_nodes[input.neighbors][chosen_index]
                                matching.append((t, choice))
                                value += A[t, choice]
                    
                        offline_nodes = diff(offline_nodes, choice)

                _, offline_opt = dp.offline_opt(A, coin_flips)
                _, greed_value = dp.greedy(A, coin_flips, 0.0)
                if offline_opt > 0:
                    learned_vals.append(value / offline_opt)
                    greedy_vals.append(greed_value / offline_opt)
            
            learned_mean = np.mean(learned_vals); greedy_mean = np.mean(greedy_vals)
            learned_std = np.std(learned_vals, ddof=1); greedy_std = np.std(greedy_vals, ddof=1)
            print(f"Learned competitive ratio: {learned_mean} ± {2 * learned_std / np.sqrt(num_trials)}")
            print(f"Greedy competitive ratio: {greedy_mean} ± {2 * greedy_std / np.sqrt(num_trials)}")
            print()

test_model(trained_model, num_trials, node_configs, graph_configs)

In [None]:
num_trials = 250
node_configs = [(n, 16) for n in range(16, 52, 4)]
node_configs = [(48, 12), (32, 32), (12, 48)]
graph_configs = [
    {
        'graph_type': 'GM'
    },
    {
        'graph_type': 'ER',
        'p': 0.75,
        'weighted': True
    },
    {
        'graph_type': 'BA',
        'ba_param': 3,
        'weighted': True
    },
    {
        'graph_type': 'GEOM',
        'threshold': 0.2,
        'scaling': 1 / np.sqrt(2),
    }
]

def test_model(trained_model, num_trials, node_configs, graph_configs):
    for m, n in node_configs:
        for config in graph_configs:
            print(m, n, config)
            greedy_vals = []
            learned_vals = []
            for _ in range(num_trials):
                A = gg.sample_bipartite_graph(m, n, **config)
                p = [0.8 for _ in range(m)]
                coin_flips = [np.random.binomial(1, _p) for _p in p]
                all_nodes = np.arange(n + m + 1)
                offline_nodes = frozenset(np.arange(n))
                matching = []
                value = 0
                for t in range(m):
                    if coin_flips[t]:
                        input = gg._to_pyg_test(A, p, offline_nodes, t)
                        pred = trained_model(input.x, input.edge_index, input.edge_attr, input.batch, input.graph_features)
                        chosen_index = np.argmax(pred[input.neighbors].detach().numpy())
                        choice = all_nodes[input.neighbors][chosen_index]
                        if choice < n:
                            matching.append((t, choice))
                            value += A[t, choice]
                    
                        offline_nodes = diff(offline_nodes, choice)

                _, offline_opt = dp.offline_opt(A, coin_flips)
                _, greed_value = dp.greedy(A, coin_flips, 0.0)
                if offline_opt > 0:
                    learned_vals.append(value / offline_opt)
                    greedy_vals.append(greed_value / offline_opt)
            
            learned_mean = np.mean(learned_vals); greedy_mean = np.mean(greedy_vals)
            learned_std = np.std(learned_vals, ddof=1); greedy_std = np.std(greedy_vals, ddof=1)
            print(f"Learned competitive ratio: {learned_mean} ± {2 * learned_std / np.sqrt(num_trials)}")
            print(f"Greedy competitive ratio: {greedy_mean} ± {2 * greedy_std / np.sqrt(num_trials)}")
            print()

test_model(trained_model, num_trials, node_configs, graph_configs)

In [None]:


def edge_weight_dist(config):
  weights = []
  for _ in range(100):
    weight = gg.sample_bipartite_graph(8, 8, **config)
    weight[weight>0] = 1
    weights.append(weight.sum(axis=0))
  return np.array(weights)

In [None]:
config = {
        'graph_type': 'GEOM',
        'threshold': 0.2,
        'scaling': 1 / np.sqrt(2)
    }
edge_weight_dist(config).mean(axis=0)

In [None]:
config = {
        'graph_type': 'GEOM',
        'threshold': 0.2,
        'scaling': 0.5
    }
edge_weight_dist(config)

In [None]:
import matplotlib.pyplot as plt
outputs = [
  (
    [0.934, 0.919, 0.909, 0.910, 0.908, 0.905, 0.909, 0.909, 0.910],
    [0.898, 0.867, 0.845, 0.838, 0.829, 0.798, 0.782, 0.763, 0.761],
    'GMISSION'
  ),
  (
    [0.915, 0.911, 0.934, 0.950, 0.962, 0.965, 0.965, 0.968, 0.969],
    [0.956, 0.921, 0.888, 0.876, 0.864, 0.852, 0.847, 0.842, 0.846],
    'ER'
  ),
  (
    [0.909, 0.906, 0.918, 0.929, 0.942, 0.943, 0.952, 0.950, 0.955],
    [0.875, 0.828, 0.821, 0.806, 0.796, 0.795, 0.788, 0.774, 0.774],
    'BA'
  ),
  (
    [0.859, 0.861, 0.882, 0.903, 0.930, 0.936, 0.949, 0.949, 0.953],
    [0.961, 0.941, 0.905, 0.875, 0.855, 0.838, 0.825, 0.814, 0.804],
    'GEOM'
  ),
]

node_seq = [n/16 for n in range(16, 52, 4)]

for output in outputs:
    plt.plot(node_seq, output[0], label=f'GNN')
    plt.plot(node_seq, output[1], label=f'Greedy')
    plt.legend()
    plt.title(output[2])
    plt.xlabel('Offline/Online ratio')
    plt.ylabel('Competitive ratio')
    plt.show()

In [None]:
m = 10; n = 10; num_trials = 1

# config = {
#     'graph_type': 'ER',
#     'p': 0.75,
#     'weighted': True
# }
# config = {
#         'graph_type': 'BA',
#         'ba_param': 3,
#         'weighted': True
# }
config = {
    'graph_type': 'GEOM',
    'threshold': 0.2,
    'scaling': 1 / np.sqrt(2)
}
# config = {
#         'graph_type': 'GM'
# }
def diagnose(hints, preds):
    return np.max(hints) - hints[np.argmax(preds)]
    
lost_weights = []
choices = []
no_skips = []

for i in range(num_trials):
    A = gg.sample_bipartite_graph(m, n, **config)
    p = [0.8 for _ in range(m)]
    cache = dp.cache_stochastic_opt(A, p)
    coin_flips = [np.random.binomial(1, _p) for _p in p]
    all_nodes = np.arange(n + m + 1)
    offline_nodes = frozenset(np.arange(n))
    OPT = cache[0][offline_nodes][0]
    matching = []
    value = 0
    for t in range(m):
        if coin_flips[t]:
            input = gg._to_pyg_test(A, p, offline_nodes, t)
            hints = dp.one_step_stochastic_opt(A, offline_nodes, t, cache)
            opt_index = np.argmax(hints)
            pred = trained_model(input.x, input.edge_index, input.edge_attr, input.batch, input.graph_features)
            preds = pred[input.neighbors].detach().numpy()
            chosen_index = np.argmax(preds)
            choice = all_nodes[input.neighbors][chosen_index]
            lost_weight = diagnose(hints, preds) / OPT

            #print(hints, preds, A[t,:][input.neighbors.detach().numpy()[:12]])
            choices.append(opt_index == chosen_index)
            print(opt_index, chosen_index, hints[opt_index], hints[chosen_index], len(hints) - 1)
            no_skips.append(chosen_index == len(hints) - 1)

            lost_weights.append(lost_weight)
            #print(f"Lost weight: {lost_weight}")
            
            
            if choice < n:            
                matching.append((t, choice))
                value += A[t, choice]

        
            offline_nodes = diff(offline_nodes, choice)
    greedy_matching, greedy_val = dp.greedy(A, coin_flips, 0)
    # print(len(matching), value, len(greedy_matching), greedy_val, matching, greedy_matching)
    # print(A[0,0])

In [None]:
np.mean(no_skips)

In [None]:
np.mean(choices)

In [None]:
plt.hist(lost_weights, bins=25)
print(np.mean(lost_weights).round(3), np.std(lost_weights).round(3), len(lost_weights) / 250)
print(np.quantile(lost_weights, [0.1 * x for x in range(1,10)]))
plt.show()

In [None]:
plt.hist(lost_weights, bins=25)
print(np.mean(lost_weights).round(3), np.std(lost_weights).round(3), len(lost_weights))
print(np.quantile(lost_weights, [0.1 * x for x in range(1,10)]))
plt.show()

In [None]:
#Need to predict skip more often?
for t in range(m):
  print(t, np.mean((record[0][t])).round(3), np.mean(record[1][t]).round(3), np.mean(record[2][t]).round(3))
  # plt.bar(labels, vals)
  # plt.xticks(rotation=45)
  # plt.show()

0 0.434 0.184 -0.064
1 0.46 0.177 -0.068
2 0.515 0.178 -0.059
3 0.545 0.173 -0.066
4 0.677 0.144 -0.071
5 0.99 0.094 -0.0

0 0.5 0.11 -0.049
1 0.527 0.113 -0.038
2 0.465 0.105 -0.046
3 0.547 0.091 -0.036
4 0.552 0.079 -0.04
5 0.99 0.075 -0.0

0 0.522 0.115 -0.038
1 0.438 0.106 -0.046
2 0.576 0.105 -0.058
3 0.505 0.093 -0.061
4 0.626 0.079 -0.045
5 0.99 0.071 -0.0

In [None]:
m = 14; n = 8; num_trials = 100

# config = {
#     'graph_type': 'ER',
#     'p': 0.75,
#     'weighted': True
# }
config = {
    'graph_type': 'GEOM',
    'threshold': 0.2,
    'scaling': 1 / np.sqrt(2)
}
def error_code(chosen_index, pred_index, length):
    if chosen_index == pred_index and chosen_index == length - 1:
        return "Correct"
    elif chosen_index == pred_index:
        return "Correct"
    elif chosen_index == length - 1:
        return "Incorrect"
    elif pred_index == length - 1:
        return "Incorrect"
    else:
        return "Incorrect"
    
record = ([[] for _ in range(m)], [[] for _ in range(m)], [[] for _ in range(m)])
greedy_vals = []
learned_vals = []
for i in range(num_trials):
    A = gg.sample_bipartite_graph(m, n, **config)
    p = [1 for _ in range(m)]
    cache = dp.cache_stochastic_opt(A, p)
    coin_flips = [np.random.binomial(1, _p) for _p in p]
    all_nodes = np.arange(n + m + 1)
    offline_nodes = frozenset(np.arange(n))
    matching = []
    value = 0
    for t in range(m):
        if coin_flips[t]:
            input = gg._to_pyg_test(A, p, offline_nodes, t)
            pred = trained_model(input.x, input.edge_index, input.edge_attr, input.graph_features)
            
            hints = dp.one_step_stochastic_opt(A, offline_nodes, t, cache)
            hints = np.max(hints) - hints
            # print(t)
            # print(pred[input.neighbors].squeeze().detach().numpy())
            # print(hints)
            # print()
            chosen_index = np.argmin(pred[input.neighbors].detach().numpy())
            opt_index = np.argmin(hints)
            choice = all_nodes[input.neighbors][chosen_index]
            correct = (chosen_index == opt_index)
            reduction = hints[opt_index] - hints[chosen_index]
            record[0][t].append(error_code(opt_index, chosen_index, len(hints)))
            record[1][t].append(reduction)
            record[2][t].append(np.sum(input.neighbors.detach().numpy()))
            if choice < n:
                matching.append((t, choice))
                value += A[t, choice]
        
            offline_nodes = diff(offline_nodes, choice)

   
    opt_matching, opt_value = dp.stochastic_opt(A, coin_flips, cache)
    _, greed_value = dp.greedy(A, coin_flips, 0)
    learned_vals.append(value / opt_value)
    greedy_vals.append(greed_value / opt_value)

In [None]:
#Need to predict skip more often?
import matplotlib.pyplot as plt
for t in range(m):
  labels, vals =  np.unique(record[0][t], return_counts=True)
  print(t, np.mean(np.array(record[0][t]) == 'Correct'), np.mean(record[2][t]))
  # plt.bar(labels, vals)
  # plt.xticks(rotation=45)
  # plt.show()

0 0.59 8.18
1 0.51 7.69
2 0.54 7.35
3 0.64 6.55
4 0.54 6.2
5 0.69 5.54
6 0.68 5.12
7 0.64 4.61
8 0.63 4.19
9 0.7 3.78
10 0.67 3.31
11 0.74 2.65
12 0.83 2.32
13 1.0 1.9

In [None]:
import matplotlib.pyplot as plt

for i in range(len(record[0])):
    print(f"Node: {i}, average reduction: {np.mean(record[1][i])}")
    print(f"Node: {i}, average neighbors: {np.mean(record[2][i])}")
    plt.hist(record[1][i], bins=10)
    plt.show()

In [None]:
matching

In [None]:
opt_matching

In [None]:
value, opt_value

In [None]:
greed_value

In [None]:
def greedy_test(num_trials, node_configs, graph_configs, r):
    matched_weights = []
    for m, n in node_configs:
        for config in graph_configs:
            vals = []
            print(m, n, config)
            for _ in range(num_trials):
                A = gg.sample_bipartite_graph(m, n, **config)
                p = [0.8 for _ in range(m)]
                coin_flips = [np.random.binomial(1, _p) for _p in p]
                matching, val = dp.greedy(A, coin_flips, r)
                matched_weight = [A[t, i] for (t, i) in matching]
                matched_weights.append(matched_weight)
                vals.append(val / dp.offline_opt(A, coin_flips)[1])
            mean_ratio = np.mean(vals)
            std_ratio = np.std(vals, ddof=1)
            print(f"Stoch opt competitive ratio: {np.round(mean_ratio, 3)} ± {np.round(2 * std_ratio / np.sqrt(num_trials), 3)}")
            print()
    return np.array(matched_weights)


num_trials = 1000
node_configs = [(50, 10)]
graph_configs = [
    {
        'graph_type': 'GEOM',
        'threshold': 0.2,
        'scaling': 1 / np.sqrt(2)
    }
]



In [None]:
matched_weights = greedy_test(num_trials, node_configs, graph_configs, 0)
matched_weights.mean(axis=0)

In [None]:
matched_weights = greedy_test(num_trials, node_configs, graph_configs, 0)
matched_weights.mean(axis=0)

In [None]:
greedy_test(num_trials, node_configs, graph_configs, 0)

In [None]:
greedy_test(num_trials, node_configs, graph_configs, 0.25)

In [None]:
greedy_test(num_trials, node_configs, graph_configs, 0.5)

In [None]:
greedy_test(num_trials, node_configs, graph_configs, 0.75)

In [None]:
m = 5; n = 10
config = {
    'graph_type': 'GEOM',
    'threshold': 0.2,
    'scaling': 1 / np.sqrt(2)
}

A = gg.sample_bipartite_graph(m, n, **config)
for t in range(m):
  print(np.mean(A[t, :]), np.std(A[t, :]))

In [None]:
config = {
    'graph_type': 'ER',
    'p': 0.75,
    'weighted': True
}

A = gg.sample_bipartite_graph(m, n, **config)
for t in range(m):
  print(np.mean(A[t, :]), np.std(A[t, :]))

In [None]:
config = {
    'graph_type': 'BA',
    'ba_param': 4,
    'weighted': True
}

A = gg.sample_bipartite_graph(m, n, **config)
weights = A.flatten()
for t in range(m):
  print(np.mean(A[t, :]), np.std(A[t, :]))