In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from scipy.stats import ortho_group
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn import linear_model

In [2]:
# Orthogonal/Inverted Matrix Based Homomorphic Encryption

def encryption_train(X, y):
    U1 = ortho_group.rvs(dim=X.shape[0])
    if X.shape[1] > 1:
        U2 = ortho_group.rvs(dim=X.shape[1])
    else:
        U2 = np.random.rand(1, 1)
    X_enc = U1.dot(X).dot(U2)
    y_enc = U1.dot(y)
    return [X_enc, y_enc, U1, U2]

def decryption_train(X, y, U1, U2):
    X_dec = U1.T.dot(X).dot(np.linalg.inv(U2))
    y_dec = U1.T.dot(y)
    return [X_dec, y_dec]

def encryption_test(X, U2):
    if X.shape[0] > 1:
        U3 = ortho_group.rvs(dim=X.shape[0])
    else:
        U3 = np.random.rand(1, 1)
    X_enc = U3.dot(X).dot(np.linalg.inv(U2))
    return [X_enc, U3]

def decryption_test(y_enc, U3):
    y_dec = np.linalg.inv(U3).dot(y_enc)
    return y_dec

def estimator_OLS(X, y):
    beta_hat = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
    return beta_hat

def predict(beta_hat, X):
    return X.dot(beta_hat)


In [3]:
def dataload(cci_data, input_cols, output_cols):
    x_data = cci_data[input_cols].loc[0:5000, :]
    y_data = cci_data[output_cols].loc[0:5000]

    x_data_shortened = cci_data[input_cols].loc[0:50, :]
    y_data_shortened = cci_data[output_cols].loc[0:50]

    H_enc = encryption_train(x_data_shortened, y_data_shortened)

    H_dec = decryption_train(H_enc[0], H_enc[1], H_enc[2], H_enc[3])

    X_enc = H_enc[0]
    y_enc = H_enc[1]

    return x_data, y_data, x_data_shortened, y_data_shortened, X_enc, y_enc, H_enc


In [4]:
import pandas as pd
import tensorflow as tf
import numpy as np

def vanillaModel(x_data, y_data):
    my_feature_columns = []
    for key in x_data.keys():
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

    hidden_units = [64, 32, 16]

    classifier = tf.estimator.DNNClassifier(
        feature_columns=my_feature_columns,
        hidden_units=hidden_units,
        n_classes=2,
        optimizer=lambda: tf.keras.optimizers.legacy.Adam(
            learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
                initial_learning_rate=0.01, decay_steps=1000, decay_rate=0.96)),
        dropout=0.3
    )

    def input_fn(features, labels, training=True, batch_size=256):
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
        if training:
            dataset = dataset.shuffle(1000).repeat()
        return dataset.batch(batch_size)

    early_stopping_hook = tf.estimator.experimental.stop_if_no_decrease_hook(
        classifier, metric_name='loss', max_steps_without_decrease=500, min_steps=1000)

    classifier.train(
        input_fn=lambda: input_fn(x_data, y_data, training=True),
        steps=2000,
        hooks=[early_stopping_hook]
    )

    eval_result = classifier.evaluate(
        input_fn=lambda: input_fn(x_data, y_data, training=False), steps=100)

    print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

def homomorphicEncryptionModel(X_enc, y_enc, x_data, H_enc):
    X_enc = pd.DataFrame(X_enc)
    X_enc.columns = list(x_data.columns)

    X_enc = H_enc[0]
    y_enc = H_enc[1]

    X_enc = pd.DataFrame(X_enc)
    X_enc.columns = list(x_data.columns)
    y_enc = pd.Series(y_enc)

    my_feature_columns = []
    for key in x_data.columns:
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

    opti = tf.keras.optimizers.legacy.Adam(learning_rate=0.01)

    def input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((dict(X_enc), y_enc))
        return dataset.batch(10).repeat()

    def eval_input_fn():
        dataset = tf.data.Dataset.from_tensor_slices((dict(X_enc), y_enc))
        return dataset.batch(10)

    estimator = tf.estimator.DNNRegressor(
        hidden_units=[64, 32, 16],
        feature_columns=my_feature_columns,
        optimizer=opti,
        dropout=0.5
    )

    estimator.train(input_fn=input_fn, steps=2000)

    result_eval = estimator.evaluate(input_fn=eval_input_fn)

    predictions = []
    for pred in estimator.predict(input_fn=eval_input_fn):
        predictions.append(np.array(pred['predictions']).astype(float))

    from sklearn.metrics import mean_squared_error
    rmse = np.sqrt(mean_squared_error(y_enc, predictions))

    accuracy = 0
    for i in range(len(predictions)):
        if abs(predictions[i][0]) - abs(y_enc.iloc[i]) < 0.95:
            accuracy += 1

    print("HE accuracy: ", accuracy / float(len(y_enc)))
    print("RMSE: ", rmse)


In [5]:
import pandas as pd

cci_data = pd.read_csv("/content/uci_cci.csv")

input_cols = list(cci_data.columns)[:-1]
output_cols = list(cci_data.columns)[-1]

x_data, y_data, x_data_shortened, y_data_shortened, X_enc, y_enc, H_enc = dataload(cci_data, input_cols, output_cols)

vanillaModel(x_data, y_data)
homomorphicEncryptionModel(X_enc, y_enc, x_data, H_enc)


Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead


Test set accuracy: 0.779



Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.


HE accuracy:  1.0
RMSE:  0.5970361989630348
