Lineare Regression
---

In [52]:
# Import Dataset from sklearn
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.preprocessing import StandardScaler
import datetime

iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2)

Bibliotheken Beispiel
---

In [53]:
# Instantiating LinearRegression() Model
lr = LinearRegression()

# Training/Fitting the Model
lr.fit(X_train, y_train)

# Making Predictions
lr.predict(X_test)
pred = lr.predict(X_test)

# Evaluating Model's Performance
print('Mean Absolute Error:', mean_absolute_error(y_test, pred))
print('Mean Squared Error:', mean_squared_error(y_test, pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, pred)))

Mean Absolute Error: 0.1584601230068247
Mean Squared Error: 0.043573599004634006
Mean Root Squared Error: 0.20874290168682144


From Scratch
---

In [62]:
def fit(x, y):
	regression = np.c_[x, np.ones(len(x))]

	weights = np.ones(regression.shape[1])

	norma = 1
	learning_rate = 0.00001
	epsilon = 0.9
	while(norma > epsilon):
		y_pred = regression @ weights.T
		partial = regression.T @ (y - y_pred)
		norma = np.sum(np.sqrt(np.square(partial)))

		weights = weights.T + (learning_rate * partial)

	return weights

def predict(w, x):
	return w[:-1] @ (np.array(x).T) + w[-1]

weights = fit(X_train, y_train)

s_predict = datetime.datetime.now()
pred = predict(weights, X_test)
e_predict = datetime.datetime.now()

def meanError(y_test, pred):
	print('Mean Absolute Error:', mean_absolute_error(y_test, pred))
	print('Mean Squared Error:', mean_squared_error(y_test, pred))
	print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, pred)))

meanError(y_test, pred)

print(f"Prediction time for original: {int((e_predict-s_predict).total_seconds() * 1000)}ms")

def timer(encryptTime, predictTime, decryptTime):
	print(f"Encryption time: {int(encryptTime.total_seconds() * 1000)}ms")
	print(f"Prediction time: {int(predictTime.total_seconds() * 1000)}ms")
	print(f"Decryption time: {int(decryptTime.total_seconds() * 1000)}ms")


Mean Absolute Error: 0.15791782248674335
Mean Squared Error: 0.042219357807520355
Mean Root Squared Error: 0.2054734966060595
Prediction time for original: 0ms


Verschlüsselung mit Pyfhel
---
TODO Untersuchen wieso Gewichte verschlüsselt sein müssen

In [71]:
from Pyfhel import Pyfhel

pyfhel = Pyfhel()
pyfhel.contextGen(p=65537)

pyfhel.keyGen()

def encrypt(data):
    return [[pyfhel.encryptFrac(feature) for feature in item] if not np.isscalar(item) else pyfhel.encryptFrac(item) for item in data]

def decrypt(data):
    return [pyfhel.decryptFrac(item) for item in data]

s_encrypt = datetime.datetime.now()
eX_test = encrypt(X_test)
eWeight = encrypt(weights)
e_encrypt = datetime.datetime.now()

s_predict = datetime.datetime.now()
ePred = predict(eWeight, eX_test)
e_predict = datetime.datetime.now()

s_decrypt = datetime.datetime.now()
cryptPred = decrypt(ePred)
e_decrypt = datetime.datetime.now()

meanError(y_test, cryptPred)
timer(e_encrypt-s_encrypt, e_predict-s_predict, e_decrypt-s_decrypt)

Mean Absolute Error: 0.1579178223464017
Mean Squared Error: 0.04221935761486088
Mean Root Squared Error: 0.20547349613724122
Encryption time: 124ms
Prediction time: 129ms
Decryption time: 3ms


* Funktionalität: Ergebnisse sind fast identisch zum Original (Abweichungen in der Gleitkommarepräsentation)
* Laufzeit: Fast gleich schnell wie die Implementierung

Verschlüsselung mit Paillier
---
Hier müssen nur die Daten verschlüsselt werden.

In [67]:
from phe import paillier

public_key, private_key = paillier.generate_paillier_keypair()

def encrypt(data):
    return [[public_key.encrypt(feature) for feature in item] for item in data]

def decrypt(data):
    return [private_key.decrypt(item) for item in data]

s_encrypt = datetime.datetime.now()
eX_test = encrypt(X_test)
e_encrypt = datetime.datetime.now()

s_predict = datetime.datetime.now()
ePred = predict(weights, eX_test)
e_predict = datetime.datetime.now()

s_decrypt = datetime.datetime.now()
cryptPred = decrypt(ePred)
e_decrypt = datetime.datetime.now()

meanError(y_test, cryptPred)
timer(e_encrypt-s_encrypt, e_predict-s_predict, e_decrypt-s_decrypt)

Mean Absolute Error: 0.15791782248674335
Mean Squared Error: 0.042219357807520355
Mean Root Squared Error: 0.2054734966060595
Encryption time: 9756ms
Prediction time: 502ms
Decryption time: 691ms


* Funktionalität: Auch wieder fast identisches Ergebnisse (Gleitkommastellen)
* Laufzeit: Benötigt deutlich länger

Verschlüsselung mit TenSEAL
---

In [68]:
import tenseal as ts

ckksContext = ts.context(
            ts.SCHEME_TYPE.CKKS,
            poly_modulus_degree=8192,
            coeff_mod_bit_sizes=[60, 40, 40, 60]
          )
ckksContext.generate_galois_keys()
ckksContext.global_scale = 2**40

def encrypt(data, function, localContext):
  return [[function(localContext, [feature]) for feature in item] for item in data]

def decrypt(data):
  return [item.decrypt() for item in data]

s_encrypt = datetime.datetime.now()
ccks_eX_test = encrypt(X_test, ts.ckks_vector, ckksContext)
e_encrypt = datetime.datetime.now()

s_predict = datetime.datetime.now()
ccks_ePred = predict(weights, ccks_eX_test)
e_predict = datetime.datetime.now()

s_decrypt = datetime.datetime.now()
ccks_cryptPred = decrypt(ccks_ePred)
e_decrypt = datetime.datetime.now()

meanError(y_test, ccks_cryptPred)
timer(e_encrypt-s_encrypt, e_predict-s_predict, e_decrypt-s_decrypt)

Mean Absolute Error: 0.15791779883915014
Mean Squared Error: 0.042219352759008164
Mean Root Squared Error: 0.2054734843209901
Encryption time: 614ms
Prediction time: 114ms
Decryption time: 24ms


* Funktionalität: Auch wieder fast identisches Ergebnisse (Gleitkommastellen)
* Laufzeit: Langsamer als Original aber trotzdem schneller als Paillier