In [None]:
import torch
import numpy as np
import os
os.chdir('..')

import torch_converter as tc
import instance_generator as ig
from torch_geometric.loader import DataLoader
from gnn_library.util import train, save, load
from util import NumpyDataset, Dataset
from evaluate import evaluate_model, pp_output
import evaluate as ev
from gnn_library.OBM_greedy import OBM_Greedy

%load_ext autoreload
%autoreload 2

In [None]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
print("PyTorch has version {}".format(torch.__version__))
print('Using device:', device)

In [None]:
GNN1, args1 = load('GNN1', device)
GNN2, args2 = load('GNN2', device)
# GNN3, args3 = load('GNN3', device)
GREEDY = OBM_Greedy()

### I. Meta GNN training/evaluation

In [None]:
args = {
    'processor':         'TEST',
    'head':              'meta',
    'num_layers':        2,
    'num_mlp_layers':    2,
    'aggr':              'max',
    'batch_size':        6,
    'node_feature_dim':  4,
    'edge_feature_dim':  1,
    'graph_feature_dim': 2,
    'hidden_dim':        64,
    'output_dim':        2,
    'dropout':           0.25,
    'epochs':            25,
    'opt':               'adam',
    'opt_scheduler':     'none',
    'opt_restart':       0,
    'weight_decay':      5e-3,
    'lr':                0.0001,
    'device':            device
}

train_num = 25; test_num = 5

er_config = {
    'graph_type': 'ER',
    'p': 1,
    'weighted': True
}
ba_config = {
    'graph_type': 'BA',
    'ba_param': 4,
    'weighted': False
}
geom_config = {
    'graph_type': 'GEOM',
    'threshold': 0.2,
    'scaling': 1 / np.sqrt(2)
}

rng = np.random.default_rng()

train_instances = [
    *ig.sample_instances(6, 10, train_num, rng, **er_config),
    *ig.sample_instances(6, 10, train_num, rng, **ba_config),
    *ig.sample_instances(6, 10, train_num, rng, **geom_config),
    *ig.sample_instances(8, 8, train_num, rng, **er_config),
    *ig.sample_instances(8, 8, train_num, rng, **ba_config),
    *ig.sample_instances(8, 8, train_num, rng, **geom_config),
    *ig.sample_instances(10, 6, train_num, rng, **er_config),
    *ig.sample_instances(10, 6, train_num, rng, **ba_config),
    *ig.sample_instances(10, 6, train_num, rng, **geom_config)
]


test_instances = [
    *ig.sample_instances(6, 10, test_num, rng, **er_config),
    *ig.sample_instances(6, 10, test_num, rng, **ba_config),
    *ig.sample_instances(6, 10, test_num, rng, **geom_config),
    *ig.sample_instances(8, 8, test_num, rng, **er_config),
    *ig.sample_instances(8, 8, test_num, rng, **ba_config),
    *ig.sample_instances(8, 8, test_num, rng, **geom_config),
    *ig.sample_instances(10, 6, test_num, rng, **er_config),
    *ig.sample_instances(10, 6, test_num, rng, **ba_config),
    *ig.sample_instances(10, 6, test_num, rng, **geom_config)
]

train_data = Dataset(
    tc._instances_to_train_samples(
        instances=train_instances,
        head=args['head'],
        meta_model_type='gnn',
        base_models=[GNN1, GNN2]
    )
)

test_data = Dataset(
    tc._instances_to_train_samples(
        instances=test_instances,
        head=args['head'],
        meta_model_type='gnn',
        base_models=[GNN1, GNN2]
    )
)

train_loader = DataLoader(
    train_data,
    batch_size=args['batch_size'],
    shuffle=True,
    num_workers=4
)

test_loader = DataLoader(
    test_data,
    batch_size=args['batch_size'],
    shuffle=True,
    num_workers=4
)

In [None]:
args = {
    'processor':         'TEST2',
    'head':              'meta',
    'num_layers':        4,
    'num_lp_layers':     2,
    'aggr':              'max',
    'batch_size':        6,
    'node_feature_dim':  6,
    'edge_feature_dim':  1,
    'graph_feature_dim': 2,
    'hidden_dim':        4,
    'output_dim':        2,
    'dropout':           0,
    'epochs':            50,
    'opt':               'adam',
    'opt_scheduler':     'none',
    'opt_restart':       0,
    'weight_decay':      5e-3,
    'lr':                0.001,
    'device':            device
}

In [None]:
sum = torch.zeros(2)
for data in train_data:
    sum += data.hint
print(sum)

In [None]:
_, _, META_GNN, _ = train(train_loader, test_loader, args)

In [None]:
save(META_GNN, args, 'META_TEST')

In [None]:
META_GNN, args = load('META_GNN', device)

In [None]:
args = {
    'processor':         'TEST2',
    'head':              'meta',
    'num_layers':        4,
    'num_mlp_layers':    2,
    'aggr':              'max',
    'batch_size':        6,
    'node_feature_dim':  7,
    'edge_feature_dim':  1,
    'graph_feature_dim': 2,
    'hidden_dim':        8,
    'output_dim':        3,
    'head_mlp_dim':      8,
    'dropout':           0,
    'epochs':            35,
    'opt':               'adam',
    'opt_scheduler':     'none',
    'opt_restart':       0,
    'weight_decay':      5e-3,
    'lr':                0.0005,
    'device':            device
}

train_num = 150; test_num = 50

er_config = {
    'graph_type': 'ER',
    'p': 1,
    'weighted': True
}
ba_config = {
    'graph_type': 'BA',
    'ba_param': 4,
    'weighted': False
}
geom_config = {
    'graph_type': 'GEOM',
    'threshold': 0.2,
    'scaling': 1 / np.sqrt(2)
}


rng = np.random.default_rng()

train_instances = [
    *ig.sample_instances(6, 10, train_num, rng, **er_config),
    *ig.sample_instances(6, 10, train_num, rng, **ba_config),
    *ig.sample_instances(6, 10, train_num, rng, **geom_config),
    *ig.sample_instances(8, 8, train_num, rng, **er_config),
    *ig.sample_instances(8, 8, train_num, rng, **ba_config),
    *ig.sample_instances(8, 8, 4*train_num, rng, **geom_config),
    *ig.sample_instances(10, 6, train_num, rng, **er_config),
    *ig.sample_instances(10, 6, train_num, rng, **ba_config),
    *ig.sample_instances(10, 6, train_num, rng, **geom_config)
]


test_instances = [
    *ig.sample_instances(6, 10, test_num, rng, **er_config),
    *ig.sample_instances(6, 10, test_num, rng, **ba_config),
    *ig.sample_instances(6, 10, test_num, rng, **geom_config),
    *ig.sample_instances(8, 8, test_num, rng, **er_config),
    *ig.sample_instances(8, 8, test_num, rng, **ba_config),
    *ig.sample_instances(8, 8, 4*test_num, rng, **geom_config),
    *ig.sample_instances(10, 6, test_num, rng, **er_config),
    *ig.sample_instances(10, 6, test_num, rng, **ba_config),
    *ig.sample_instances(10, 6, test_num, rng, **geom_config)
]


train_data = Dataset(
    tc._instances_to_train_samples(
        instances=train_instances,
        head=args['head'],
        meta_model_type='gnn',
        base_models=[GNN1, GNN2, GREEDY]
    )
)

test_data = Dataset(
    tc._instances_to_train_samples(
        instances=test_instances,
        head=args['head'],
        meta_model_type='gnn',
        base_models=[GNN1, GNN2, GREEDY]
    )
)

In [None]:
train_loader = DataLoader(
    train_data,
    batch_size=args['batch_size'],
    shuffle=True,
    num_workers=4
)

test_loader = DataLoader(
    test_data,
    batch_size=args['batch_size'],
    shuffle=True,
    num_workers=4
)

In [None]:
args = {
    'processor':         'TEST2',
    'head':              'meta',
    'num_layers':        4,
    'num_mlp_layers':    2,
    'aggr':              'max',
    'batch_size':        6,
    'node_feature_dim':  7,
    'edge_feature_dim':  1,
    'graph_feature_dim': 2,
    'hidden_dim':        8,
    'output_dim':        3,
    'head_mlp_dim':      8,
    'dropout':           0,
    'epochs':            35,
    'opt':               'adam',
    'opt_scheduler':     'none',
    'opt_restart':       0,
    'weight_decay':      5e-3,
    'lr':                0.001,
    'device':            device
}

In [None]:
_, _, _, META_GNN_test, _ = train(train_loader, test_loader, args)

In [None]:
save(META_GNN_test, args, 'META_GNN_test')

In [None]:
er_config = {
    'graph_type': 'ER',
    'p': 1,
    'weighted': True
}
ba_config = {
    'graph_type': 'BA',
    'ba_param': 4,
    'weighted': False
}
geom_config = {
    'graph_type': 'GEOM',
    'threshold': 0.2,
    'scaling': 1 / np.sqrt(2)
}

In [None]:
seed = np.random.randint(0, 500000)
(m, n) = (48, 16)
config = geom_config

rng = np.random.default_rng(seed)
eval_instances = ig.sample_instances(m, n, 100, rng, **config)

rng = np.random.default_rng(seed)
ratios1, win_rates1 = ev.evaluate_model(
    meta_model=META_GNN_test,
    meta_model_type='gnn',
    base_models=[GNN1, GNN2, GREEDY],
    instances=eval_instances,
    batch_size=500,
    rng=rng,
    num_realizations=5,
    baselines=['greedy', 'lp_rounding']
)

pp_output(ratios1)
print()
print()

# rng = np.random.default_rng(seed)

# ratios2, win_rates2 = ev.evaluate_model(
#     meta_model=None,
#     meta_model_type=None,
#     base_models=[GNN1],
#     instances=eval_instances,
#     batch_size=500,
#     rng=rng,
#     num_realizations=10
# )

# pp_output(ratios2)

# print()
# print()

# rng = np.random.default_rng(seed)

# ratios, win_rates = ev.evaluate_model(
#     meta_model=None,
#     meta_model_type=None,
#     base_models=[GNN2],
#     instances=eval_instances,
#     batch_size=500,
#     rng=rng,
#     num_realizations=10
# )

# pp_output(ratios)

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 6))
for (name, vals) in meta_win_rates.items():
    plt.plot(vals, label=f"{name}")
plt.legend()
plt.xlabel('Online node')
plt.ylabel('Agreement with OPT')
plt.show()

In [None]:
num_trials = 200
node_configs = [(x, 16) for x in np.arange(8, 64, 4)]
# of nodes [20 -> 80]
# of nodes in batch [10,000 -> 40,000]
batch_size = 500 #[int(min(32, x + y)) for (x, y) in node_configs]
graph_configs = [
    # {
    #     'graph_type': 'GM'
    # },
    {
        'graph_type': 'ER',
        'p': 0.75,
        'weighted': True
    },
    {
        'graph_type': 'BA',
        'ba_param': 4,
        'weighted': True
    },
    {
        'graph_type': 'GEOM',
        'threshold': 0.2,
        'scaling': 1 / np.sqrt(2),
    }
]

ratios = [x/y for (x,y) in node_configs]
print(ratios)
data = {config['graph_type']: [] for config in graph_configs}
for graph_config in graph_configs:
    for i, node_config in enumerate(node_configs):
        print(graph_config, node_config)
        seed = np.random.randint(0, 500000)
        rng = np.random.default_rng(seed)
        instances = ig.sample_instances(*node_config, num_trials, rng, **graph_config)

        rng = np.random.default_rng(seed)
        meta_ratios, meta_win_rates = evaluate_model(
            meta_model=META_GNN_test,
            meta_model_type='gnn',
            base_models=[GNN1, GNN2, GREEDY],
            instances=instances,
            batch_size=batch_size,
            rng=rng,
            num_realizations=5,
            baselines=['greedy', 'lp_rounding']
        )

        # rng = np.random.default_rng(seed)
        # gnn1_ratios = evaluate_model(
        #     meta_model=None,
        #     meta_model_type=None,
        #     base_models=[GNN1],
        #     instances=instances,
        #     batch_size=batch_size,
        #     rng=rng,
        #     num_realizations=5
        # )

        # rng = np.random.default_rng(seed)
        # gnn2_ratios, _, _ = evaluate_model(
        #     meta_model=None,
        #     meta_model_type=None,
        #     base_models=[GNN2],
        #     instances=instances,
        #     batch_size=batch_size,
        #     rng=rng,
        #     num_realizations=5
        # )


        data[graph_config['graph_type']].append(np.array(
            [
                meta_ratios["learned"],
                # gnn1_learned_ratios,
                # gnn2_learned_ratios,
                meta_ratios["greedy"],
                meta_ratios["lp_rounding"]
            ]
        ))

In [None]:
import pickle

with open('data/er_ba_geom_ratios_meta_test.pickle', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
import matplotlib.pyplot as plt
ratios = [x/y for (x,y) in node_configs]
for graph_type, comp_ratios in data.items():
    greedy_avg_ratios = []
    meta_avg_ratios = []
    # gnn1_avg_ratios = []
    # gnn2_avg_ratios = []
    lp_match_avg_ratios = []
    max_avg_ratios = []

    for trial_ratios in comp_ratios:
        meta_avg_ratios.append(np.array(trial_ratios[0]).mean())
        # gnn1_avg_ratios.append(np.array(trial_ratios[1]).mean())
        # gnn2_avg_ratios.append(np.array(trial_ratios[2]).mean())
        lp_match_avg_ratios.append(np.array(trial_ratios[2]).mean())
        greedy_avg_ratios.append(np.array(trial_ratios[1]).mean())
        #max_avg_ratios.append(np.array(np.max(trial_ratios[1:3, :], axis=0)).mean())

    print(graph_type)
    fig = plt.figure(figsize=(8,6))
    plt.title(graph_type)
    # plt.plot(ratios, gnn1_avg_ratios, label='GNN1')
    # plt.plot(ratios, gnn2_avg_ratios, label='GNN2')
    plt.plot(ratios, lp_match_avg_ratios, label='LP ROUNDING')
    plt.plot(ratios, greedy_avg_ratios, label='Greedy')
    #plt.plot(ratios, max_avg_ratios, label='MAX')
    plt.plot(ratios, meta_avg_ratios, label='GNN')
    plt.xlabel('# online / # offline')
    plt.ylabel('Average competitive ratio')
    plt.legend()
    plt.show()


In [None]:
comp = np.vstack([gnn1_learned_ratios, gnn2_learned_ratios, greedy_ratios]).T
comp[:10]

In [None]:
print(np.max(comp[:2, :], axis=1).mean())
print(comp.mean(axis=0))

In [None]:
index1 = np.argmax(comp[:, 0] - comp[:, 1])
index2 = np.argmax(comp[:, 1] - comp[:, 0])

In [None]:
tc._featurize(instances[index1])

In [None]:
tc._featurize(instances[index2])

In [None]:
GNN2_features = []
GNN1_features = []
for i, boolean in enumerate(comp[:, 1] > comp[:, 0]):
    if boolean:
        GNN2_features.append(tc._featurize(instances[i]))
    else:
        GNN1_features.append(tc._featurize(instances[i]))

out1 = np.vstack(GNN1_features)
out2 = np.vstack(GNN2_features)


In [None]:
out1.mean(axis=0)

In [None]:
out2.mean(axis=0)