In [None]:
import csv
import time
import keras
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

import utils

In [None]:
filename = f"data_oneleak_train(stratify)_without_outliers"

In [None]:
df_points = utils.read_data(path=f"./data/{filename}.csv")
df_vacuumports = utils.read_data(path="./data/vacuumports.csv")
df_hyperplanes = utils.read_data(path="./data/hyperplanes.csv")

In [None]:
df_points.shape

In [None]:
# outliers = [15,38,208,218,293]
# df_points = df_points.drop(index=outliers).reset_index().drop(["index"],axis=1)

In [None]:
X, Y = df_points.drop(["y1","y2"],axis=1), df_points[["y1","y2"]]
W, b = tf.constant(df_hyperplanes[["w1","w2"]], dtype=tf.float32), tf.Variable(df_hyperplanes["b"], dtype=tf.float32)
theta = b
# b is tf.Variable because we need to optimize its value, tf.constant doesn't allow the value of its tensor to change.

In [None]:
voronoi_regions_normals, voronoi_regions_offsets = utils.create_voronoi_regions_normals_and_offsets(df_hyperplanes,10)

$$
s_{j,k} \in \{-1, 0, 1\}, \\
s_k = \begin{pmatrix}
s_{1,k} \\
s_{2,k} \\
\vdots \\
s_{j,k} \\
\end{pmatrix} \\

s_{j,k}= \begin{cases}
0 & \text{if } j \notin I_k \\
1 & \text{if point lies within the region} \\
-1 & \text{if point lies outside of the region}
\end{cases}
$$

In [None]:
def generate_s_k(region):
    """
    Generates s_k for all the hyperplanes.
    Consists of only 3 elements {-1,0,1}
    if point lies on the correct side of hyperplane
        then s_j_k = 1
    if point lies on the wrong side of the hyperplane
        then s_j_k = -1
    if hyperplane is not related to the current voronoi region
        then s_j_k = 0
    """
    df_region_hyperplanes = df_hyperplanes[df_hyperplanes["region"]==int(region)]
    s_k = np.zeros(df_hyperplanes.shape[0])
    for index, row in df_hyperplanes.iterrows():
        if row["region"] == region:
            s_k[index] = 1
        else:
            W, b = tf.constant([row["w1"], row["w2"]], dtype=tf.float32), tf.constant(row["b"], dtype=tf.float32)
            for _, region_hyperplane in df_region_hyperplanes.iterrows():
                if utils.same_hyperplane(W,b,region_hyperplane["w1"],region_hyperplane["w2"],region_hyperplane["b"]):
                    s_k[index] = -1
                    break        
    return tf.constant(s_k, dtype=tf.float32)

#### Creating a custom loss function

$$
s^{(i)} := \underset{x_k=1, \ldots, d}{\arg \max} \ k \\
$$

$$
\min_{\theta} \ \frac{1}{m} \sum_{i=1}^{m} \mathbf{1}^T \text{ReLU}\left(s^{(i)} \circ \left[ \mathbf{W}y^{(i)} + \mathbf{b} \right]\right) \\
\text{Where } \mathbf{1} = (1, 1, 1, \ldots)^T
$$

We use a dictionary s_k for storing the value of s_k for each region. This will help us in minimizing the overall time as it will prevent calculating s_k for different region even tho they were calculated before.

In [None]:
s_k_dict = dict()
for i in range(len(df_vacuumports.index)):
    s_k_dict[i+1] = generate_s_k(region=i+1)

In [None]:
def custom_cost_function(theta,df_points):
    """
    Calculates the cost function which needs to be minimised.
    cost = min_over_theta (1/m)sum_i=1_to_m ReLU(s_(i) o [W.y_(i) + b])
    where, m := cardinality of input data.
    """
    W = tf.constant(df_hyperplanes[["w1","w2"]], dtype=tf.float32)
    point_coordinates = tf.constant(df_points[["y1","y2"]].values, dtype=tf.float32)
    
    vaccum_flows = df_points.iloc[:,:10].values

    x_tilde_indices = tf.argmax(vaccum_flows, axis=1) + 1
    s_k= tf.constant(np.array([s_k_dict[x] for x in x_tilde_indices.numpy()]))

    # print(point_coordinates[:5],tf.transpose(W))
    result = tf.matmul(point_coordinates, tf.transpose(W)) + theta
    # print(f"theta: {theta}, result -> theta: {result -  tf.matmul(point_coordinates, tf.transpose(W)) }")
    relu_result = tf.nn.relu(s_k * result)

    # Sum up all problem costs to get the total cost
    cost = tf.reduce_sum(relu_result)
    return cost/len(df_points)

#### Creating a simple model

In [None]:
X.shape, Y.shape

In [None]:
class VoronoiModel(tf.keras.Model):
    def __init__(self,W,b):
        super(VoronoiModel, self).__init__()
        self.W = W
        self.b = b

    def call(self,inputs):
        x, y = inputs
        ones = tf.ones(self.W.shape[0], dtype=tf.float32)
        flow_region = tf.argmax(x) + 1      # feature extraction
        s_k = tf.constant(generate_s_k(region=tf.constant(flow_region)), dtype=tf.float32)
        result = tf.tensordot(self.W,y,axes=1) + self.b # W.y + b
        relu_result = tf.tensordot(ones,tf.nn.relu(s_k * result), axes=1)
        return relu_result

#### Preparing the data

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((X,Y))

In [None]:
initial_cost = custom_cost_function(theta=theta,df_points=df_points)
print(f"The initial cost is {initial_cost}")

#### Training loop

In [None]:
epochs = 1
learning_rate = 1.01

# result = pd.DataFrame(columns=["Epoch","Learning rate","Optimizer","Step","Accuracy","Loss value","Theta(b)"])

In [None]:
# Initiating an optimizer
optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
optimizer_name = "SGD"

In [None]:
# @tf.function
def train_step(x, y, df_points):
    with tf.GradientTape() as tape:
        model = VoronoiModel(W=W,b=theta)
        logits = model(inputs=(x,y))
    grads = tape.gradient(logits,[theta])
    optimizer.apply_gradients(zip(grads, [theta]))
    loss_value = custom_cost_function(theta=theta, df_points=df_points)
    return loss_value

In [None]:
start_time = time.time()
initial_accuracy = utils.calculate_accuracy(df_hyperplanes=df_hyperplanes,df_points=df_points,theta=theta)
print(f"Initial accuracy: {initial_accuracy}")

accuracy = initial_accuracy

for epoch in range(epochs):
    for step, (x_train,y_train) in enumerate(train_dataset):
        loss_value = train_step(x=x_train,y=y_train,df_points=df_points)
        accuracy = utils.calculate_accuracy(df_hyperplanes=df_hyperplanes,df_points=df_points,theta=theta)
        result = pd.concat([result, pd.DataFrame([[epoch,learning_rate,"SGD",step,accuracy,loss_value.numpy(),[theta.numpy()]]], columns=result.columns)])
    print(f"Accuracy: {accuracy}, Training loss: {loss_value} -> for epoch {epoch}")
end_time = time.time()
print(f"Total time required: {end_time-start_time} seconds")


In [None]:
output_filename = f"{epochs}_{learning_rate}_{optimizer_name}_{filename}.csv"

In [None]:
result.to_csv(f"./Outputs/{output_filename}")

In [None]:
training_accuracy = utils.calculate_accuracy(df_hyperplanes,df_points,theta)
print(f"The accuracy for training data is {round(training_accuracy*100,2)}")

In [None]:
custom_cost_function(theta,df_points)

In [None]:
print(f"Loss value after {epochs} epoch: {loss_value}")

In [None]:
print(f"New theta: {theta}")

In [None]:
df_hyperplanes["b"] = theta.numpy()
voronoi_regions_normals, voronoi_regions_offsets = utils.create_voronoi_regions_normals_and_offsets(df=df_hyperplanes,total_regions=10)

In [None]:
def marker_style(row):
    if row["True region"] == row["Predicted region"]:
        return "o"
    else:
        return "x"

In [None]:
true_region = utils.true_regions(df=df_points)

In [None]:
df_points["True region"] = true_region
df_points["Predicted region"] = [utils.which_region(voronoi_regions_normals,voronoi_regions_offsets,np.array([row["y1"], row["y2"]]))  for _, row in df_points.iterrows()]
df_points["Marker"] = df_points.apply(marker_style, axis=1)

In [None]:
points_fill = np.array(np.loadtxt("./data/fill.csv"))
points_contour = np.array(np.loadtxt("./data/contour.csv"))

In [None]:
n_test_points = 5000000
y = np.concatenate([np.random.uniform(low=points_contour[:, 0].min(), high=points_contour[:, 0].max(), size=(1, n_test_points)),
                    np.random.uniform(low=points_contour[:, 1].min(), high=points_contour[:, 1].max(), size=(1, n_test_points))], axis=0)

y_regions = np.zeros(n_test_points)
for index,point in enumerate(tf.transpose(y)):
    y_regions[index] = utils.which_region(voronoi_regions_normals,voronoi_regions_offsets,np.array(point))

#### Visualisation for training data

In [None]:
fig = plt.figure(figsize=(20,10))

utils.plot_contour(points=points_contour,points_fill=points_fill)

sns.scatterplot(df_points,x="y1",y="y2",hue="True region",palette="colorblind",s=30,style="Marker", markers={"x": "X", "o": "o"},linewidth=1,zorder=1)

df_test = pd.DataFrame(data=np.concatenate([y.T], axis=1), columns=['y1', 'y2'])
df_test['region'] = y_regions
sns.scatterplot(df_test, x='y1', y='y2', hue='region', palette='colorblind', s=10, linewidth=0, zorder=0, legend=False)

plt.legend(title="Marker", handles=[
    plt.Line2D([], [], marker='X', linestyle='None', color='black', label='Point in wrong region'),
    plt.Line2D([], [], marker='o', linestyle='None', color='black', label='Point in correct region')
])
plt.title(f"Visualisation for Training data.\nOptimizer = {optimizer_name}, Epochs = {epoch+1},\nAccuracy = {round(training_accuracy*100,2)}, Loss = {round(float(loss_value.numpy()),2)}\nwith outliers({filename}.csv)")
plt.show()

In [None]:
fig.savefig(f"./plots/Final/Stratify/With outliers/Adaptive_voronoi_training_{filename}.png")

In [None]:
with open("./results.csv", "a",encoding="UTF-8") as csvfile:
    csvwrite = csv.writer(csvfile)
    csvwrite.writerow([epoch+1,learning_rate,optimizer_name,training_accuracy*100,loss_value.numpy(),[theta.numpy()],end_time-start_time])

In [None]:
with open("./Comparisons.csv", "a",encoding="UTF-8") as csvfile:
    csvwrite = csv.writer(csvfile)
    csvwrite.writerow(["Adaptive Voronoi","Train",50,optimizer_name,learning_rate,training_accuracy*100,loss_value.numpy(),[theta.numpy()],f"{filename}.csv",True,end_time-start_time])

### Visualisation for testing data

In [None]:
test_filename = f"data_oneleak_test(stratify)_with_outliers"

In [None]:
df_points_test = pd.read_csv(f"./data/{test_filename}.csv")

In [None]:
predicted_region = utils.predicted_regions(voronoi_regions_normals=voronoi_regions_normals,voronoi_regions_offsets=voronoi_regions_offsets,df=df_points_test)
testing_accuracy = utils.calculate_accuracy(df_hyperplanes,df_points=df_points_test,theta=theta)
testing_loss = custom_cost_function(theta,df_points_test)

In [None]:
print(f"The accuracy on unseen data is {round(testing_accuracy*100,2)}")

In [None]:
print(f"The loss on unseen data is {testing_loss.numpy()}")


In [None]:
df_points_test["True region"] = utils.true_regions(df=df_points_test)
df_points_test["Predicted region"] = predicted_region
df_points_test["Marker"] = df_points_test.apply(marker_style, axis=1)

In [None]:
fig = plt.figure(figsize=(20,10))

utils.plot_contour(points=points_contour,points_fill=points_fill)

sns.scatterplot(df_points_test,x="y1",y="y2",hue="True region",palette="colorblind",s=30,style="Marker", markers={"x": "X", "o": "o"},linewidth=1,zorder=1)

df_test = pd.DataFrame(data=np.concatenate([y.T], axis=1), columns=['y1', 'y2'])
df_test['region'] = y_regions
sns.scatterplot(df_test, x='y1', y='y2', hue='region', palette='colorblind', s=10, linewidth=0, zorder=0, legend=False)

plt.legend(title="Marker", handles=[
    plt.Line2D([], [], marker='X', linestyle='None', color='black', label='Point in wrong region'),
    plt.Line2D([], [], marker='o', linestyle='None', color='black', label='Point in correct region')
])
plt.title(f"Visualisation for Testing data.\nOptimizer = {optimizer_name}, Epochs = {epoch+1},\nAccuracy = {round(testing_accuracy*100,2)}, Loss = {round(float(testing_loss.numpy()),2)}\nwith outliers({test_filename}.csv)")
plt.show()

In [None]:
fig.savefig(f"./plots/Final/Stratify/With outliers/Adaptive_voronoi_testing_{test_filename}.png")

In [None]:
with open("./Comparisons.csv", "a",encoding="UTF-8") as csvfile:
    csvwrite = csv.writer(csvfile)
    csvwrite.writerow(["Adaptive Voronoi","Test",epoch+1,optimizer_name,learning_rate,round(testing_accuracy*100,2),testing_loss.numpy(),[theta.numpy()],f"{test_filename}.csv",True,end_time-start_time])