In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# import dependencies
from scipy.spatial import KDTree
import numpy as np
import plotly.express as px
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.datasets import make_circles

from gdeep.create_nets import Net, train_classification_nn

from fastai import *
from fastai.tabular import * 
from fastai.tabular.all import *

## Utility funcitons

These functions are useful onl to reproduce the algorithm of the paper

In [9]:
#label dataset
def labelling(X,net):
    labels = []
    for x in X:
        if torch.cuda.is_available():
            x_tensor_cuda = torch.from_numpy(x.reshape(1,-1)).float().cuda()
        x_tensor = torch.from_numpy(x.reshape(1,-1)).float()
    #print(torch.from_numpy(x.reshape(1,-1)).float())
        try:
            labels.append(str(np.argmax(net.forward(None,x_tensor).detach().numpy(),axis=-1)[0]))
        except:
            labels.append(str(np.argmax(net.forward(None,x_tensor_cuda).detach().cpu().numpy(),axis=-1)[0]))
    df = pd.DataFrame(X, columns = ["x", "y"])
    df["label"]=labels
    return df

# prepare computations of NN with KDtree, classwise
def build_trees(df):
    Q_1 = df[df["label"]=="1"].values[:,:2]
    Q_0 = df[df["label"]=="0"].values[:,:2]
    Q0_tree = KDTree(Q_0)
    Q1_tree = KDTree(Q_1)
    return Q0_tree, Q1_tree, Q_0, Q_1

# home made algorithm, definitely not very efficient...
def project(x,xA,xB):
    v = xA-xB
    alpha = 0
    xq = x + alpha*v
    xqf = xq
    dist = np.abs(np.linalg.norm(xA-xq) - np.linalg.norm(xB-xq))
    for i in range(400):
        new_alpha = alpha + 4/(i+1)*(np.random.rand(1)[0]-0.5)
        xq = x + new_alpha*v
        new_dist = np.abs(np.linalg.norm(xA-xq) - np.linalg.norm(xB-xq))
        if new_dist < dist:
            dist = new_dist
            alpha = new_alpha
            xqf = x + alpha*v
            #print(dist)
    return xqf

# algorithm 1
def move_x_around(X,Q0_tree,Q1_tree,Q_0,Q_1):
    Q = []
    for x in X:
        dA, iA = Q0_tree.query(x, k = 1)
        dB, iB = Q1_tree.query(x, k = 1)
        q = project(x,Q_0[iA],Q_1[iB])
        Q.append(q)
    return np.array(Q).astype('float64')

# keep trying initialising until there is a boundary in the plotted region!
def initialisation():
    for i in range(10000):
        # initialise network:
        net = Net(0, [2,10,50,20,10])

        # generate uniform point cloud
        xs = np.outer(np.ones((11)),np.linspace(-2,2,11))
        ys = np.outer(np.linspace(-2,2,11),np.ones((11)))
        X = np.stack((xs,ys),axis=-1).reshape(-1,2)
        # add noise to X
        X = X + (np.random.random(X.shape)-0.5)*0.3

        # check if there is a boundary
        if len(np.unique(labelling(X,net).label))>1:
            print("Found! We can proceed")
            return net, X.astype('float64')

In [10]:
# start the initialisation
net, X = initialisation()

px.scatter(labelling(X,net), x="x", y="y", color="label")

Found! We can proceed


In [11]:
# run the full code of the paper
NUM_EPOCHS=30
store_plot = []
for epoch in range(NUM_EPOCHS):
    df=labelling(X,net)
    Q0_tree,Q1_tree,Q_0,Q_1 = build_trees(df)
    X = move_x_around(X,Q0_tree,Q1_tree,Q_0,Q_1)
    if epoch%10 == 0:
        store_plot.append(df)

In [12]:
# ploting every 10 time step
for plot_df in store_plot:
    fig=px.scatter(plot_df, x="x", y="y", color="label")
    fig.show()

# With a fixed dataset 

We now train a model on the circles dataset and check its boundary

In [13]:
# fix dataset
X_fix, y_fix = make_circles(n_samples=1000, shuffle=True, noise=0.05, factor=0.5)
df_fix=pd.DataFrame(X_fix,columns=["x","y"])
df_fix["label"]=[str(y) for y in y_fix]

fig = px.scatter(df_fix,x="x", y="y", color="label")
fig.show()

df_fix.head()

Unnamed: 0,x,y,label
0,0.543107,-0.138679,1
1,0.417176,-0.235464,1
2,0.886819,-0.489722,0
3,0.491577,0.019813,1
4,-0.42586,-0.338952,1


In [14]:
learn = train_classification_nn(nn=Net(0,[2,5,10,5]), X=X_fix,y=y_fix)

epoch,train_loss,valid_loss,accuracy,time
0,0.675005,0.663857,0.785,00:00
1,0.674564,0.663209,0.785,00:00
2,0.673997,0.66243,0.785,00:00
3,0.673238,0.661388,0.8,00:00
4,0.672208,0.65994,0.805,00:00
5,0.670818,0.658028,0.81,00:00
6,0.668986,0.655693,0.82,00:00
7,0.666598,0.652848,0.83,00:00
8,0.663556,0.649596,0.84,00:00
9,0.659801,0.645866,0.85,00:00


In [15]:
# start the initialisation of the uniform point cloud
_, X = initialisation()

net2 = learn.model

px.scatter(labelling(X,net2), x="x", y="y", color="label")

Found! We can proceed


In [16]:
# run the full code of the paper with the trained model, moving only the uniform points
NUM_EPOCHS=30
store_plot = []
for epoch in range(NUM_EPOCHS):
    df=labelling(X,net2)
    Q0_tree,Q1_tree,Q_0,Q_1 = build_trees(df)
    X = move_x_around(X,Q0_tree,Q1_tree,Q_0,Q_1)
    if epoch%10 == 0:
        store_plot.append(df)

In [17]:
# ploting every 10 time step
for plot_df in store_plot:
    fig=px.scatter(plot_df, x="x", y="y", color="label")
    fig.show()