In [None]:
import scipy
from matplotlib.colors import LogNorm
import networkx
import sklearn
import sklearn.cluster

In [None]:
import json
import matplotlib.pyplot as plt
import sys

import numpy as np
import sklearn
import keras

import os
os.chdir("..")

sys.path += ["test"]
from train_clustering import load_element_pairs, fill_target_matrix, fill_elem_pairs

In [None]:
training_info = json.load(open("clustering.json"))

In [None]:
plt.title("Edge classifier")
plt.plot(training_info["loss"])
plt.plot(training_info["val_loss"])
plt.xlabel("epoch")

In [None]:
fn = "data/TTbar/191009_155100/step3_AOD_{0}_ev{1}.npz".format(1, 0)
data = np.load(fn)
els = data["elements"]
els_blid = data["element_block_id"]

fi = open(fn.replace("ev", "dist"), "rb")
dm = scipy.sparse.load_npz(fi).todense()

#Create the matrix of elements thar are connected according to the miniblock id
target_matrix = np.zeros((len(els_blid), len(els_blid)), dtype=np.int32)
fill_target_matrix(target_matrix, els_blid)

In [None]:
g = networkx.from_numpy_matrix(dm)

In [None]:
plt.figure(figsize=(5,5))
ax = plt.axes()
networkx.draw(
    g,
    pos=[(els[i, 2], els[i, 3]) for i in range(len(els))],
    node_size=1, axes=ax, alpha=0.2, edge_color="grey", edgelist=[])
plt.axis('on')
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")

In [None]:
plt.figure(figsize=(5,5))
ax = plt.axes()
networkx.draw(
    g,
    pos=[(els[i, 2], els[i, 3]) for i in range(len(els))],
    node_size=1, axes=ax, alpha=0.2, edge_color="grey", edgelist=list(g.edges)[:200])
plt.axis('on')
plt.title("{0} elements,  200 / {1} edges".format(len(g.edges), len(g.edges)))
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")

In [None]:
all_elem_pairs_X = []
all_dms = []
all_blockids_true = [] 
all_elem_pairs_y = []

for i in range(500):
    for j in range(6,7):
        fn = "data/TTbar/191009_155100/step3_AOD_{1}_ev{0}.npz".format(i, j)
        all_blockids_true += [np.load(open(fn, "rb"))["element_block_id"]]
        
        print("Loading {0}".format(fn))
        elem_pairs_X, elem_pairs_y = load_element_pairs(fn)
        all_elem_pairs_X += [elem_pairs_X]
        all_elem_pairs_y += [elem_pairs_y]
        
        fi = open(fn.replace("ev", "dist"), "rb")
        dm = scipy.sparse.load_npz(fi).todense()
        all_dms += [dm]

In [None]:
elem_pairs_X = np.vstack(all_elem_pairs_X)
elem_pairs_y = np.vstack(all_elem_pairs_y)

In [None]:
from collections import Counter

In [None]:
model = keras.models.load_model("clustering.h5")

In [None]:
pp = model.predict(elem_pairs_X, batch_size=10000)

In [None]:
from sklearn.linear_model import LogisticRegression
t0 = 5
t1 = 1
sel = (elem_pairs_X[:, 0]==t0) & (elem_pairs_X[:, 1]==t1)

dumdum = LogisticRegression(solver="lbfgs")
dumdum.fit(elem_pairs_X[sel], elem_pairs_y[sel, 0])

In [None]:
r = sklearn.metrics.roc_curve(elem_pairs_y[sel, 0], pp[sel])
r2 = sklearn.metrics.roc_curve(elem_pairs_y[sel, 0], 1.0 - elem_pairs_X[sel, 2])

In [None]:
threshold = 0.5
idx = len(r2) - np.searchsorted(r[2][::-1], threshold)

In [None]:
plt.plot(r[2], r[0], label="edge FPR")
plt.plot(r[2], r[1], label="edge TPR")
plt.xlim(0,1)
plt.legend()
plt.ylabel("rate")
plt.xlabel("threshold")
plt.title("Element {0}<->{1} linking".format(t0, t1))

In [None]:
plt.figure(figsize=(5,5))
plt.plot(r[0], r[1], label="simple feedforward DNN")
plt.plot(r2[0], r2[1], color="black", ls="--", label="logistic regression")
plt.title("Element {0}<->{1} linking".format(t0, t1))
plt.xlabel("edge FPR")
plt.ylabel("edge TPR")
plt.xlim(0,1)
plt.ylim(0,1)

In [None]:
pp

In [None]:
elem_pairs_X[0]

In [None]:
import numba
@numba.njit
def pred_to_adj_matrix(nelems, pred, dm, adj_matrix):
    n = 0
    for i in range(nelems):
        for j in range(i+1, nelems):
            assert(n <= len(pred))
            if dm[i,j] > 0:
                adj_matrix[i,j] = pred[n, 0]
            n += 1

In [None]:
def predict_clustering(fn):
    fi = open(fn, "rb")
    data = np.load(fi)
    els = data["elements"]
    els_blid = data["element_block_id"]
    nelem = len(els)

    #Load the distance matrix
    fi = open(fn.replace("ev", "dist"), "rb")
    dm = scipy.sparse.load_npz(fi).todense()
  
    print(els.shape, dm.shape)
    return els, dm
#     elem_pairs_X, elem_pairs_y = load_element_pairs(fn)
#     pp = model.predict(elem_pairs_X)
    
#     adj_matrix = np.zeros((nelem, nelem), dtype=np.float32)
#     pred_to_adj_matrix(nelem, pp, dm, adj_matrix)
#     return adj_matrix

In [None]:
fn

In [None]:
els, dm = predict_clustering('data/TTbar/191009_155100/step3_AOD_7_ev0.npz')

In [None]:
@numba.njit
def set_triu(i1, i2, vec, mat):
    for i in range(len(i1)):
        mat[i1[i], i2[i]] = vec[i]

In [None]:
i1, i2 = np.triu_indices(len(els))
dmv = np.array(dm[i1, i2])[0, :]
vec = np.vstack([els[i1, 0], els[i2, 0], dmv]).T
p2 = model.predict_proba(vec, batch_size=100000)
p2[dmv==0]=0

mat = np.zeros((len(els), len(els)))
set_triu(i1, i2, p2[:, 0], mat)

In [None]:
g1 = networkx.from_numpy_matrix(dm)
g2 = networkx.from_numpy_matrix(mat>0.5)

In [None]:
len(list(networkx.connected_component_subgraphs(g1)))

In [None]:
len(list(networkx.connected_component_subgraphs(g2)))

In [None]:
sklearn.metrics