In [1]:
# Regular Funcs
import os
import cv2
import glob
import shutil
import random

import pandas as pd
import pathlib
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
from PIL import Image as ImagePIL
import plotly.express as px
import plotly.graph_objects as go

from numpy.random import randint
from sklearn.model_selection import train_test_split

In [2]:
# Statistics
from scipy import stats
from scipy import integrate

In [3]:
# Tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential

from keras.optimizers import Adam
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import Dropout
from keras.layers import LeakyReLU
from keras.utils.vis_utils import plot_model
from keras.layers import Conv2DTranspose
from keras.layers import Reshape
from keras import backend

from keras.layers import BatchNormalization
from keras.initializers import RandomNormal
from keras.constraints import Constraint

2023-07-25 11:40:23.416484: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Custom Funcs
from Unpack_Scaffold_Data import readAndOutputDataset, curveVisualization

# Data Read Utility

In [5]:
curve_path = "/Users/zacharyg/Documents/GitHub/fundemental-neural-nets/GANS/Scaffold_GAN/scaffold_dataset_WU_LAB/Prints"
modulus_path = "/Users/zacharyg/Documents/GitHub/fundemental-neural-nets/GANS/Scaffold_GAN/scaffold_dataset_WU_LAB/Prints/modulus_data_types.csv"

In [6]:
X, y, y_df, file_order = readAndOutputDataset(curve_path, modulus_path, reverse=True);

DOC COUNT: 675
Operation Finished.

     Index     Modulus  Spacing  Infill  Height  Speed  Temperature   Mass  \
0        1  358.528888      0.8       1     0.1     30          190  0.394   
1        2  301.639039      0.9       1     0.1     30          190  0.334   
2        3  292.501492      1.0       1     0.1     30          190  0.308   
3        4  258.539802      1.1       1     0.1     30          190  0.286   
4        5  238.213024      1.2       1     0.1     30          190  0.259   
..     ...         ...      ...     ...     ...    ...          ...    ...   
670    671  151.559731      0.8       3     0.2     50          230  0.428   
671    672   85.074096      0.9       3     0.2     50          230  0.341   
672    673   52.285252      1.0       3     0.2     50          230  0.290   
673    674   70.811230      1.1       3     0.2     50          230  0.292   
674    675   36.627466      1.2       3     0.2     50          230  0.244   

     Porosity    Type  
0  

In [7]:
# Sanity Check
print("X SHAPE:", X.shape);
print("y SHAPE:", y.shape);
print();


# Visualization
# curveVisualization(X, y, file_order);

X SHAPE: (675, 2, 1803)
y SHAPE: (675, 10)



# Utility

In [8]:
def transposeStressData(X_Data):
    X = [];
    
    for data in X_Data:
        X.append(data.T);
        
    return np.array(X);

def normalizeStressStrain(x):
    for curve_index in range(len(x)):
        curve = x[curve_index];
        
        max_stress_val = np.max(curve[0]);
        max_strain_val = np.max(curve[1]);
        
        curve[0] = curve[0] / max_stress_val;
        curve[1] = curve[1] / max_strain_val;
        
    return x;

def normalize(x):
    """
    Normalize a list of sample image data in the range of 0 to 1
    
    Parameters
    -----------------
    x: Array of Homogenous (RGB) values of input data 
    
    Returns
    -----------------
    new_imgs: (numpy integer array) Numpy array of normalized data
    """
    return np.array((x - np.min(x)) / (np.max(x) - np.min(x)))

def stringtoCategorical(y):    
    data = [];
    
    for type_index in range(len(y)):
        wrd = y[type_index];
        encoding = 0.0;
        
        if (wrd == "Cubic"):
            encoding = 1.0;
        elif (wrd == "Gyroid"):
            encoding = 2.0;
            
        data.append([encoding]);
        
    return np.array(data);

# Process Parameter Stripping

In [9]:
def parameterStrip(y):
    y_t = y.T;
    
    Index = y_t[0];
    Modulus = y_t[1];
    Spacing = y_t[2];
    Infill = y_t[3];
    Height = y_t[4];
    Speed = y_t[5];
    Temp = y_t[6];
    Mass = y_t[7];
    Porosity = y_t[8];
    Type = y_t[9];
    return Index, Modulus, Spacing, Infill, Height, Speed, Temp, Mass, Porosity, Type

Index, Modulus, Spacing, Infill, Height, Speed, Temp, Mass, Porosity, Type = parameterStrip(y);

def parameterStripInfill(y):
    y_t = y.T;
    
    Modulus = y_t[0];
    Porosity = y_t[1];
    Energy_Absorb = y_t[2];
    Height = y_t[3];
    Spacing = y_t[4];
    Speed = y_t[5];
    Temp = y_t[6];
    
    return Modulus, Porosity, Energy_Absorb, Height, Spacing, Speed, Temp    
    

# Energy Absorption Calculation

In [10]:
Energy_Absorption = [];

for curve in X:
    interval_x = curve[0];
    interval_y = curve[1];
    
    val = integrate.simpson(interval_y, interval_x);
    Energy_Absorption.append(val);
    
Energy_Absorption = np.array(Energy_Absorption);

# Sanity Check
print(Energy_Absorption.shape);

(675,)


# Data Division based on Infill Type

In [11]:
def organizeParameters(_Data):
    """
    Desc
    """
    Modulus = _Data[:, 1:2];
    Porosity = _Data[:, 8:9];
    Energy_Abs = _Data[:, 10:11];
    Spacing = _Data[:, 2:3];
    printing_params = _Data[:, 4:7];

    cut_params = np.concatenate((
        Modulus, 
        Porosity,
        Energy_Abs,
        Spacing,
        printing_params
    ), axis=1);
    
    return cut_params;


Line_Data = [];
Cubic_Data = [];
Gyroid_Data = [];

_y = cut_params = np.concatenate((
    y,
    (np.reshape(Energy_Absorption, (675,1))),
), axis=1);

for curve in _y:
    if ('Gyroid' in curve):
        Gyroid_Data.append(curve);
    elif ('Cubic' in curve):
        Cubic_Data.append(curve);
    elif ('Line' in curve):
        Line_Data.append(curve);
        
Line_Data = np.array(Line_Data);
Cubic_Data = np.array(Cubic_Data);
Gyroid_Data = np.array(Gyroid_Data);


X_Line = organizeParameters(Line_Data);
X_Cubic = organizeParameters(Cubic_Data);
X_Gyroid = organizeParameters(Gyroid_Data);

# Sanity Check
print(X_Line.shape)
print(X_Cubic.shape)
print(X_Gyroid.shape)

(225, 7)
(225, 7)
(225, 7)


# Infill Parameter Stripping

In [12]:
Modulus_Cubic, Porosity_Cubic, Energy_Absorb_Cubic, Height_Cubic, Spacing_Cubic, Speed_Cubic, Temp_Cubic = parameterStripInfill(X_Cubic);
    

# Plotting Utility

In [13]:
# 675 Stress-Strain Curve Domain
feature_domain_675 = list(range(675 + 1));
feature_domain_675.pop(0) 
feature_domain_675 = np.repeat(feature_domain_675, 4, axis=0) # Changed to 4

In [14]:
feature_domain_8 = list(range(8 + 1));
feature_domain_8.pop(0);
feature_domain_8_rep = list(np.arange(1,9))*675

In [15]:
feature_domain_7 = list(range(7 + 1));
feature_domain_7.pop(0);
feature_domain_7_rep = list(np.arange(1,8))*675

In [16]:
feature_domain_5 = list(range(5 + 1));
feature_domain_5.pop(0);
feature_domain_5_rep = list(np.arange(1,6)) * 675

In [17]:
feature_domain_4 = list(range(4 + 1));
feature_domain_4.pop(0);
feature_domain_4_rep = list(np.arange(1,5)) * 675

In [18]:
feature_domain_2 = list(range(2 + 1));
feature_domain_2.pop(0);
feature_domain_2_rep = list(np.arange(1,3)) * 675

# Parameter Cutting [Fixiating on Cubic]

Since temperature and Line Spacing has the heighest Spearman Correlation value, lets just fixiate on just these two values.

In [31]:
print(X_Cubic)
print()
print()

X_Cubic_Curve = X_Cubic[:, :3]
X_Cubic_Printing_1 = X_Cubic[:, 6:]
X_Cubic_Printing_2 = X_Cubic[:, 4:5]

X_Cubic_Printing_ALL = X_Cubic[:, 3:7]

X_Cubic_Data = np.concatenate((
        X_Cubic_Curve, 
        X_Cubic_Printing_1,
        X_Cubic_Printing_2,
), axis=1);

X_Cubic_Data_ALL = np.concatenate((
        X_Cubic_Curve, 
        X_Cubic_Printing_ALL
), axis=1);

X_Cubic_Data_Reg = np.copy(X_Cubic_Data);

# Incase we want to fixuate it on the same scale
# for curve in X_Cubic_Data:
#     curve[1] = curve[1] * 1000
#     curve[2] = curve[2] / 10
#     curve[4] = curve[4] * 1000
    
# Incase we want to fixuate it on the same scale
for curve in X_Cubic_Data_ALL:
    curve[1] = curve[1] * 1000
    curve[2] = curve[2] / 10
    curve[3] = curve[3] * 100
    curve[4] = curve[4] * 1000
    curve[5] = curve[5] * 10

print(X_Cubic_Data)
print()
print(X_Cubic_Data_ALL)

[[524.6123617 0.3224 1935.3610593872165 ... 0.2 50 230]
 [546.6644154 0.2976 2092.8618420592306 ... 0.2 50 230]
 [579.4849398 0.268 2194.978811659142 ... 0.2 50 230]
 ...
 [630.3584337 0.2968 2255.0932625144205 ... 0.1 30 190]
 [691.018674 0.244 2266.351168475735 ... 0.1 30 190]
 [763.3644578 0.1808 2220.0751979865336 ... 0.1 30 190]]


[[524.6123617 0.3224 1935.3610593872165 230 0.2]
 [546.6644154 0.2976 2092.8618420592306 230 0.2]
 [579.4849398 0.268 2194.978811659142 230 0.2]
 ...
 [630.3584337 0.2968 2255.0932625144205 190 0.1]
 [691.018674 0.244 2266.351168475735 190 0.1]
 [763.3644578 0.1808 2220.0751979865336 190 0.1]]

[[524.6123617 322.40000000000003 193.53610593872165 ... 200.0 500 230]
 [546.6644154 297.59999999999997 209.28618420592306 ... 200.0 500 230]
 [579.4849398 268.0 219.4978811659142 ... 200.0 500 230]
 ...
 [630.3584337 296.8 225.50932625144205 ... 100.0 300 190]
 [691.018674 244.0 226.6351168475735 ... 100.0 300 190]
 [763.3644578 180.79999999999998 222.0075197986

# Plotting

$$
d_n = \{Modulus, Porosity, Energy Absorption, Temperature, Line Height\}
$$

In [32]:
# Single Lines Chart (DISTRIBUTION)
fig_k = px.line(
    x=feature_domain_5, 
    y=X_Cubic_Data[200],
    title="Single Parameter Curve",
    labels={"x": "Parameters", "y":"Normalized values (Divided by Max)"}
)

fig_k.show()


# Multiple Lines Chart (DISTRIBUTION)
fig = go.Figure()

for line in range(len(X_Cubic)):
    data = X_Cubic_Data[line];
    fig.add_trace(go.Scatter(x=feature_domain_5, y=data))

fig.show()

# Quick Normalization

In [21]:
print("Max value:", np.max(X_Cubic_Data));
X_Cubic_Data_N = X_Cubic_Data / np.max(X_Cubic_Data);

X_Cubic_Data_ALL_N = X_Cubic_Data_ALL / np.max(X_Cubic_Data_ALL)

Max value: 827.9307229


In [22]:
# Multiple Lines Chart (DISTRIBUTION)
fig = go.Figure()

# for line in range(len(X_Cubic)):
#     data = X_Cubic_Data_N[line];
#     fig.add_trace(go.Scatter(x=feature_domain_5, y=data))
    
for line in range(len(X_Cubic_Data_ALL_N)):
    data = X_Cubic_Data_ALL_N[line];
    fig.add_trace(go.Scatter(x=feature_domain_7, y=data))

fig.show(renderer = "browser")

# K-Means CGAN

In [None]:
from sklearn.neighbors import KernelDensity
from sklearn.cluster import KMeans

In [None]:
print("Max value:", np.max(X_Cubic_Data));
X_Cubic_Data_N = X_Cubic_Data / np.max(X_Cubic_Data);

print(X_Cubic_Data_N)

# CTGAN

In [23]:
from ctgan import CTGAN

In [33]:
print(X_Cubic_Data)
print(X_Cubic_Data.shape)

[[524.6123617 0.3224 1935.3610593872165 230 0.2]
 [546.6644154 0.2976 2092.8618420592306 230 0.2]
 [579.4849398 0.268 2194.978811659142 230 0.2]
 ...
 [630.3584337 0.2968 2255.0932625144205 190 0.1]
 [691.018674 0.244 2266.351168475735 190 0.1]
 [763.3644578 0.1808 2220.0751979865336 190 0.1]]
(225, 5)


In [34]:
ctgan = CTGAN(epochs=2000)
ctgan.fit(X_Cubic_Data)


Future versions of RDT will not support the 'model_missing_values' parameter. Please switch to using the 'missing_value_generation' parameter to select your strategy.


Future versions of RDT will not support the 'model_missing_values' parameter. Please switch to using the 'missing_value_generation' parameter to select your strategy.


Future versions of RDT will not support the 'model_missing_values' parameter. Please switch to using the 'missing_value_generation' parameter to select your strategy.


Future versions of RDT will not support the 'model_missing_values' parameter. Please switch to using the 'missing_value_generation' parameter to select your strategy.


Future versions of RDT will not support the 'model_missing_values' parameter. Please switch to using the 'missing_value_generation' parameter to select your strategy.



In [35]:
synthetic_data = ctgan.sample(255)
fig_k = go.Figure()

for line in range(len(synthetic_data)):
    data = synthetic_data[line];
    fig_k.add_trace(go.Scatter(x=feature_domain_7, y=data))
    
fig_k.show()
    
print(synthetic_data)

[[4.48702114e+02 4.94877979e-01 1.22673863e+03 1.93000000e+02
  1.97183959e-01]
 [9.87549622e+02 6.56260928e-02 2.04509109e+03 2.27000000e+02
  2.00939594e-01]
 [6.48701925e+02 4.76790252e-01 2.05955853e+03 1.93000000e+02
  1.43164774e-01]
 ...
 [8.30200275e+02 4.39632187e-02 2.20884839e+03 2.09000000e+02
  1.48550732e-01]
 [6.13704203e+02 1.61507548e-01 2.19953894e+03 2.30000000e+02
  1.95406697e-01]
 [6.37716926e+02 5.01929066e-01 9.95219086e+02 1.92000000e+02
  9.08020211e-02]]


In [39]:
import numpy as np
def trunc(values, decs=0):
    return np.trunc(values*10**decs)/(10**decs)

print(trunc(synthetic_data, decs=2))

[[4.48700e+02 4.90000e-01 1.22673e+03 1.93000e+02 1.90000e-01]
 [9.87540e+02 6.00000e-02 2.04509e+03 2.27000e+02 2.00000e-01]
 [6.48700e+02 4.70000e-01 2.05955e+03 1.93000e+02 1.40000e-01]
 ...
 [8.30200e+02 4.00000e-02 2.20884e+03 2.09000e+02 1.40000e-01]
 [6.13700e+02 1.60000e-01 2.19953e+03 2.30000e+02 1.90000e-01]
 [6.37710e+02 5.00000e-01 9.95210e+02 1.92000e+02 9.00000e-02]]


In [None]:
import json

def write_list(a_list):
    print("Started writing list data into a json file")
    with open("fake_Gyroid.json", "w") as fp:
        json.dump(a_list, fp)
        print("Done writing JSON data into .json file")

# Read list to memory
def read_list():
    # for reading also binary mode is important
    with open('fake_Gyroid.json', 'rb') as fp:
        n_list = json.load(fp)
        return n_list

# assume you have the following list
write_list(synthetic_data.tolist())
r_names = read_list()
print('List is', r_names)

In [None]:
def stripParams_5(y):
    M = [];
    P = [];
    E = [];
    T = [];
    H = [];
    
    for curve in y:
        M.append(curve[0]);
        P.append(curve[1]);
        E.append(curve[2]);
        T.append(curve[3]);
        H.append(curve[4]);
        
    return np.array(M), np.array(P), np.array(E), np.array(T), np.array(H)

# Mod_N, Por_N, Engy_N, Temp_N, Height_N = stripParams_5(X_Cubic_Data_N);

def stripParams_7(y):
    M = [];
    P = [];
    E = [];
    Spacing = [];
    H = [];
    Speed = [];
    T = [];
    
    
    for curve in y:
        M.append(curve[0]);
        P.append(curve[1]);
        E.append(curve[2]);
        Spacing.append(curve[3]);
        H.append(curve[4]);
        Speed.append(curve[5]);
        T.append(curve[6]);
        
        
    return np.array(M), np.array(P), np.array(E), np.array(Spacing), np.array(H), np.array(Speed), np.array(T), 

Mod_N, Por_N, Engy_N, Spacing_N, Height_N, Speed_N, Temp_N, = stripParams_7(X_Cubic_Data_ALL_N);

In [None]:
# Curve_3D_Cluster = X_Cubic_Data_N[:, :3]

In [None]:
Curve_3D_Cluster = X_Cubic_Data_ALL_N[:, :3]

In [None]:
Curve_2D_Cluster = np.concatenate((
        X_Cubic[:, :1], 
        X_Cubic[:, 2:3],
), axis=1);

In [None]:
data = {
    'Energy Absorption Capacity': X_Gyroid[:, 2:3].flatten(),
    'Compression Modulus': X_Gyroid[:, :1].flatten(),
}

df = pd.DataFrame(data)

In [None]:
kmeans = KMeans(n_clusters=10)                   # Number of clusters == 3
label = kmeans.fit_predict(df)                      # Getting the cluster labels

In [None]:
# kmeans = KMeans(n_clusters=3)                   # Number of clusters == 3
# kmeans = kmeans.fit(Curve_3D_Cluster)                          # Fitting the input data
# label = kmeans.predict(Curve_3D_Cluster)                      # Getting the cluster labels
# centroids = kmeans.cluster_centers_             # Centroid values

In [None]:
#filter rows of original data
df1 = df[label == 0]
df2 = df[label == 1]
df3 = df[label == 2]
df4 = df[label == 3]
df5 = df[label == 4]
df6 = df[label == 5]
df7 = df[label == 6]
df8 = df[label == 7]
df9 = df[label == 8]
df10 = df[label == 9]

dfs = {
    "df1" : df1, 
    "df2": df2, 
    "df3" : df3, 
    "df4" : df4, 
    "df5" : df5,
    "df6" : df6,
    "df7" : df7,
    "df8" : df8,
    "df9" : df9,
    "df10" : df10,
}

# plot the data
fig = go.Figure()

for i in dfs:
    fig = fig.add_trace(go.Scatter(x = dfs[i]['Compression Modulus'],
                                   y = dfs[i]['Energy Absorption Capacity'], 
                                   name = i))
fig.show()


In [None]:
print(Curve_3D_Cluster[x][:, 0].shape[0])
print(Curve_3D_Cluster[y][:, 0].shape[0])
print(Curve_3D_Cluster[z][:, 0].shape[0])

CLUS = [Curve_3D_Cluster[x][:, 0].shape[0], Curve_3D_Cluster[y][:, 0].shape[0], Curve_3D_Cluster[z][:, 0].shape[0]]

In [None]:
RED = np.empty(CLUS[0])
RED.fill(0)

BLUE = np.empty(CLUS[1])
BLUE.fill(1)

YELLOW = np.empty(CLUS[2])
YELLOW.fill(2)

Name_Arr = np.concatenate((RED, BLUE, YELLOW), axis=0)
print(Name_Arr)

In [None]:
A = np.concatenate((Curve_3D_Cluster[x][:, 0], Curve_3D_Cluster[y][:, 0], Curve_3D_Cluster[z][:, 0]), axis=0)
B = np.concatenate((Curve_3D_Cluster[x][:, 1], Curve_3D_Cluster[y][:, 1], Curve_3D_Cluster[z][:, 1]), axis=0)
C = np.concatenate((Curve_3D_Cluster[x][:, 2], Curve_3D_Cluster[y][:, 2], Curve_3D_Cluster[z][:, 2]), axis=0)

In [None]:
# fig = px.scatter_3d(x=A, y=B, z=C, color=Name_Arr)
# fig.show(renderer="browser")

In [None]:
Curve_3D_Cluster = np.array(Curve_3D_Cluster)
print("All index value is: ", np.where(Curve_3D_Cluster == Curve_3D_Cluster[z][6][0]))
print()
print(Curve_3D_Cluster[z][6])
print(X_Cubic_Data_N[8])

### Label Data based on K-Means Centroids

In [None]:
def replaceBasedOnKMeans(org, clustered, label):
    data = [];
    
    for data_index in range(len(clustered)):
        curve = clustered[data_index][0];
        ind, discard = np.where(org == curve);
    
        temp = np.copy(org[ind][0]);
#         temp = np.append(temp, label)
        data.append(temp);
    return np.array(data);

# Cluster_1 = replaceBasedOnKMeans(X_Cubic_Data_N, Curve_3D_Cluster[x], "A");
# Cluster_2 = replaceBasedOnKMeans(X_Cubic_Data_N, Curve_3D_Cluster[y], "B");
# Cluster_3 = replaceBasedOnKMeans(X_Cubic_Data_N, Curve_3D_Cluster[z], "C");

Cluster_1 = replaceBasedOnKMeans(X_Cubic_Data_ALL_N, Curve_3D_Cluster[x], "A");
Cluster_2 = replaceBasedOnKMeans(X_Cubic_Data_ALL_N, Curve_3D_Cluster[y], "B");
Cluster_3 = replaceBasedOnKMeans(X_Cubic_Data_ALL_N, Curve_3D_Cluster[z], "C");


In [None]:
# Prune the data
Cluster_A = Cluster_1[:, 3:];
Cluster_B = Cluster_2[:, 3:];
Cluster_C = Cluster_3[:, 3:];

cluster_X = np.concatenate((Cluster_A, Cluster_B, Cluster_C), axis=0);
print(cluster_X);

In [None]:
def oneCategorical(y):
    arr = [];
    for data in y:
        if (data == 0.0):
            arr.append([0.0]);
        elif (data == 1.0):
            arr.append([1.0]);
        elif (data == 2.0):
            arr.append([2.0]);
    
    return np.array(arr);

In [None]:
# hot_encoded_curves = tf.keras.utils.to_categorical(Name_Arr, num_classes = 3);
hot_encoded_curves = oneCategorical(Name_Arr);
print(hot_encoded_curves)

### Pairwise Data Structure

In [None]:
X = [];

for param_index in range(len(cluster_X)):
    data = cluster_X[param_index]
    category = hot_encoded_curves[param_index]
    data = data.astype('float32')
    category = category.astype('float32')
    payload = [data, category]
    X.append(payload);
    
# print(X)

### Tensorflow settings

In [None]:
tf.executing_eagerly()

### Discriminator Data Sampling Generator

In [None]:
def sample_real_samples(dataset, n_samples):
    """
    Parameters
    --------------
    dataset: dataset with the real data
    cond_data: the data that is conditioned with the GAN
    n_samples: amount of real images to sample from
    
    Returns
    --------------
    X: samples of n images in a list
    Y: labels of (1's) for true images (Binary Classification)
    """
    params = [];
    labels = [];
    
    for sample in range(n_samples):
        randVal = random.choice(dataset)
        params.append(randVal[0].astype('float32'));
        labels.append(randVal[1].astype('float32'));
    y = np.ones((n_samples, 1));
    
    return [params, labels], y

[P, O], B = sample_real_samples(X, 10)
print(O)

### Custom Loss Functions

BCE_Regularized:
$$
L_{BCE} = -\dfrac{1}{n} \sum_{i=1}^{n} y_{i} \cdot \log \hat{y}_i + (1 - y_i) \cdot \log (1-\hat{y}_i) + \left[ \lambda \cdot I \right]
$$ 

In [None]:
from tensorflow.keras import backend as K

In [None]:
def BCE_Regularized(y_true, y_pred, lambda_factor):
    bce = tf.keras.losses.BinaryCrossentropy();
    bce_loss = bce(y_true, y_pred)
    constraint = lambda_factor * (bce_loss)
    return bce_loss - constraint;

### Discriminator

Remember that the Objective Function this time is:

$$
\begin{equation}
\min_{G}\max_{D}V(D,G) = \mathbb{E}_{x \text{-} p_{data}(x)}[\log D(x | y)]
+ \mathbb{E}_{z \text{-} p_{z}(z)} [\log (1 - D(G(z | y))]
\end{equation}
$$

Such that $y$ is a auxillary data. In this case, its the infill type (One hot encoded) which helps better learn the distribution.

In [None]:
def conditionalDiscriminator(in_shape=4, num_classes=3):
    """
    """
    in_label = tf.keras.Input(shape=(1,))
    embed = tf.keras.layers.Embedding(num_classes, 100)(in_label) # Keep the embedding layers low...
    cond_y = tf.keras.layers.Dense(4)(embed)
    cond_y = tf.keras.layers.Reshape((4,))(cond_y)
    
    in_parameters = tf.keras.Input(shape=in_shape)
    merge = tf.keras.layers.Concatenate()([in_parameters, cond_y])
    x = tf.keras.layers.Dense(100, input_dim=in_shape)(merge)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Dense(100)(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Dense(50)(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Dense(32)(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Dense(16)(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    out = tf.keras.layers.Dense(1, activation='sigmoid')(x) # Output layer
    
    model = tf.keras.Model([in_parameters, in_label], out)
    
    opt = Adam(learning_rate = 0.0002)
    model.compile(
        loss='binary_crossentropy', 
        optimizer = opt, 
        metrics=['accuracy']
    );
    
    return model

### Generator

In [None]:
def conditionalGenerator(in_shape=4, num_classes=3):
    """
    """
    in_label = tf.keras.Input(shape=(1,))
    embed = tf.keras.layers.Embedding(num_classes, 100)(in_label) # Keep the embedding layers low...
    cond_y = tf.keras.layers.Dense(4)(embed)
    cond_y = tf.keras.layers.Reshape((4,))(cond_y)
    
    in_noise = tf.keras.Input(shape=in_shape)
    merge = tf.keras.layers.Concatenate()([in_noise, cond_y])
    x = tf.keras.layers.Dense(100)(merge)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Dense(100)(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Dense(32)(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    x = tf.keras.layers.Dense(16)(x)
    x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
    out = tf.keras.layers.Dense(4, input_dim=in_shape, activation='sigmoid')(x)
    
    model = tf.keras.Model([in_noise, in_label], out)
    
    return model;

### Summary of Models

In [None]:
discriminator = conditionalDiscriminator();
generator = conditionalGenerator(4);

In [None]:
discriminator.summary();
generator.summary();

### Latent Space

In [None]:
def latentDimensionalGenerator(latent_dimensions, n_samples, randomGaussian = False):
    data = [];
    y_cond_data = [];
    
    for sample in range(n_samples):
        x_input_0 = np.random.randn(latent_dimensions); # Points sampled from a normalized distribution.
        data.append(x_input_0);
        y_cond_data.append([float(random.randint(0, 2))])
        
    return np.array(data), np.array(y_cond_data)

In [None]:
# Generator production
def generate_samples(g_model, latent_dim, n_samples):
    x_input, y_cond = latentDimensionalGenerator(latent_dim, n_samples)  # generate points in a latent space
    X = g_model.predict([x_input, y_cond])
    y = np.zeros((n_samples, 1))  # create 'fake' class labels (0)
    return [X, y_cond], y

### Visualizing the latent dimensional space in 2D

In [None]:
fake_X, fake_y = generate_samples(generator, 4, 10);
print(len(fake_X))

In [None]:
# for curve in fake_X[0]:
#     fig = go.Figure();
#     fig.add_trace(go.Scatter(x=feature_domain_2, y=curve));
#     fig.show();

### GAN: Putting it together

In [None]:
def define_gan(generator, discriminator):
    discriminator.trainable = False # We set the discriminator as not trainable so the generator updates
    
    lambda_factor = tf.keras.Input(shape=(3,), name='regularization')
    z, y_label = generator.input
    gen_output = generator.output
    gan_output = discriminator([gen_output, y_label])
    
    model = tf.keras.Model([z, y_label, lambda_factor], gan_output)
    
    opt = Adam(learning_rate = 0.0001)
#     opt = tf.keras.optimizers.RMSprop(learning_rate = 0.0001)
#     model.add_loss(BCE_Regularized(y_label, gan_output, lambda_factor[0]))
#     model.compile(loss=None, optimizer=opt)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model

# Evaluation Metrics

### Utility

In [None]:
def parameterStrip_4(y):
    y_t = y.T;
    
    Height = y_t[0];
    Spacing = y_t[1];
    Speed = y_t[2];
    Temp = y_t[3];
    
    return Height, Spacing, Speed, Temp 

### Regularization Techniques

In [None]:
from sklearn.neighbors import KernelDensity
from scipy.integrate import quad

In [None]:
def integrand(x, kde):
    return np.exp(kde.score_samples(np.array([x]).reshape(-1, 1)))

def analyzeLiklihood(g_model, latent_dim, n_samples):
    [X_fake, labels], y_fake = generate_samples(g_model, latent_dim, n_samples)
    Height_fake, Spacing_fake, Speed_fake, Temp_fake = parameterStrip_4(X_fake)
    
    height_std_sample = np.std(Height_fake, ddof=1)
    spacing_std_sample = np.std(Spacing_fake, ddof=1)
    speed_std_sample = np.std(Speed_fake, ddof=1)
    temp_std_sample = np.std(Temp_fake, ddof=1)
    
    height_mean = np.mean(Height_fake);
    space_mean = np.mean(Spacing_fake);
    speed_mean = np.mean(Speed_fake);
    temp_mean = np.mean(Temp_fake);
    
    Height_fake = np.reshape(Height_fake, (n_samples, 1));
    Spacing_fake = np.reshape(Spacing_fake, (n_samples, 1));
    Speed_fake = np.reshape(Speed_fake, (n_samples, 1));
    Temp_fake = np.reshape(Temp_fake, (n_samples, 1));
    
    
    # Kernel Density Estimation
    kde_HEI = KernelDensity(kernel='gaussian', bandwidth=0.01)
    kde_SPA = KernelDensity(kernel='gaussian', bandwidth=0.01)
    kde_SPE = KernelDensity(kernel='gaussian', bandwidth=0.01)
    kde_TEM = KernelDensity(kernel='gaussian', bandwidth=0.01)
    
    # Fit the estimator to the data
    kde_HEI.fit(Height_fake)
    kde_SPA.fit(Spacing_fake)
    kde_SPE.fit(Speed_fake) 
    kde_TEM.fit(Temp_fake)
    
    # Evaluate the PDF on a grid of points
    x_grid = np.linspace(0, 1, 1000)[:, np.newaxis]
    log_dens_HEI = kde_HEI.score_samples(x_grid)
    log_dens_SPA = kde_SPA.score_samples(x_grid)
    log_dens_SPE = kde_SPE.score_samples(x_grid)
    log_dens_TEM = kde_TEM.score_samples(x_grid)
    
    result_hei, error_hei = quad(integrand, 0.14, np.inf, args=(kde_HEI,))
    result_spe, error_spe = quad(integrand, 0.60, np.inf, args=(kde_SPE,))
    
    print(" ========== PROBABILITIES ==========")
    print("[HEIGHT] Integrated value below (0.14):", result_hei)
    print("[HEIGHT] Error estimate below (0.14):", error_hei)
    print()
    print("[SPEED] Integrated value below (0.60):", result_spe)
    print("[SPEED] Error estimate below (0.60):", error_spe)

    plt.axvline(x=height_mean, color='r')
#     plt.axvline(x=space_mean, color='b')
    plt.axvline(x=speed_mean, color='g')
#     plt.axvline(x=temp_mean, color='m')
    
    plt.plot(x_grid, np.exp(log_dens_HEI), 'r')
#     plt.plot(x_grid, np.exp(log_dens_SPA), 'b')
    plt.plot(x_grid, np.exp(log_dens_SPE), 'g')
#     plt.plot(x_grid, np.exp(log_dens_TEM), 'm')
    plt.show()
    
    # Then choose the top (k) error parameter
    res = np.max([result_hei, result_spe]);
    
    if (res < 0.001):
        res = 0;
    elif (res > 0.80):
        res = 0.5;
  
    print("Calculated Lambda value:", res);
    return res
    
    

### Diagnostics

In [None]:
def summarize_performance(epoch, g_model, d_model, dataset, latent_dim, n_samples, save_path=""):
    # Real Images based on discriminator
    [X_real, real_labels], y_real = sample_real_samples(dataset, n_samples)
    _, acc_real = d_model.evaluate([tf.stack(X_real), tf.stack(real_labels)], y_real, verbose=0)
    
    # Fake Images based on discriminator
    [X_fake, labels], y_fake = generate_samples(g_model, latent_dim, n_samples)
    _, acc_fake = d_model.evaluate([tf.stack(X_fake), tf.stack(labels)], y_fake, verbose=0)
    
    print("============== CURVE GENERATION ON EPOCH", epoch,"==============");
    
    for curve in X_fake:
        plt.plot(feature_domain_4, curve)
    plt.axhline(y = 0.6, color='r')
    
    if (save_path != ""):
        plt.title("Training in epoch: " + str(epoch))
        plt.savefig(os.path.join(save_path, str(epoch) + '.png'));
        
    plt.show()
    
    # summarize discriminator performance
    print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100));

# GAN Training

In [None]:
# train the generator and discriminator
def train_gan(g_model, d_model, gan_model, training_data, latent_dim, n_epochs, n_batch, save_path=""):
    d1Loss = [];
    d2Loss = [];
    gLoss = [];
    
    lambda_f = np.array([0, -1, -1]).astype('float32');
    
    half_batch = int(n_batch / 2);
    
    for i in range(n_epochs):                
        # Real Image Discriminator Training
        [X_real, real_labels], y_real = sample_real_samples(training_data, half_batch) # Note X_Real is [data, labels]
        d_loss1, _ = d_model.train_on_batch([tf.stack(X_real), tf.stack(real_labels)], y_real) # Training on real

        # Fake Image Discriminator Training
        [X_fake, labels], y_fake = generate_samples(g_model, latent_dim, half_batch)
        d_loss2, _ = d_model.train_on_batch([tf.stack(X_fake), tf.stack(labels)], y_fake) # Training on fakes

        # Create a latent space and inverted labels
        noise_z, labels = latentDimensionalGenerator(latent_dim, n_batch) # Latent space generation
        y_gan = np.ones((n_batch, 1)) # Pretend that that they are all real.
        
        # The more the discrimintor selects (1), the better the generator is doing. (y_true = [1, 1, 1...])

        # Update the generator via the discriminator's error
        g_loss = gan_model.train_on_batch([tf.stack(noise_z), tf.stack(labels), lambda_f], y_gan)
        
        ######### OUTPUT BASED FUNCTIONS #########
        
        # Analyze Likelihood
        lambda_f[0] = analyzeLiklihood(g_model, 4, 255)
        print("Lambda Regularizer:", lambda_f)

        # summarize loss on this batch
        print('>%d, d1=%.3f, d2=%.3f g=%.3f' % (i+1, d_loss1, d_loss2, g_loss))
        summarize_performance(i, g_model, d_model, training_data, latent_dim, 100, save_path)
        
        d1Loss.append(d_loss1);
        d2Loss.append(d_loss2);
        gLoss.append(g_loss);
        
    return d1Loss, d2Loss, gLoss;

In [None]:
latent_dim = 4;
gan_model = define_gan(generator, discriminator);
# gan_model.summary()

In [None]:
n_epochs = 1000;

#Training
d1, d2, gloss = train_gan(
    generator, 
    discriminator, 
    gan_model, 
    X, 
    latent_dim, 
    n_epochs, # n_epochs
    3,  # batch size
#     save_path = image_save_path
);

# Loss Curves

# Prediction

In [None]:
[X_fake, labels], y_fake = generate_samples(generator, 4, 100)

for data in X_fake:
    data = data * np.max(X_Cubic_Data);
    print("[", (data[1] / 100), (data[1] / 1000), (data[2] / 10), (data[3]), "]")

In [None]:
import numpy as np
from sklearn.neighbors import KernelDensity

# Generate a random dataset
rng = np.random.RandomState(10)
X = rng.randn(100)
X = np.reshape(X, (100, 1))

print(X)

# Instantiate a kernel density estimator
kde = KernelDensity(kernel='gaussian', bandwidth=0.5)

# Fit the estimator to the data
kde.fit(X)

# Evaluate the PDF on a grid of points
x_grid = np.linspace(-5, 5, 1000)[:, np.newaxis]
log_dens = kde.score_samples(x_grid)

# Plot the estimated PDF
plt.plot(x_grid, np.exp(log_dens))
plt.show()
