# Create Sample Data & Encrypt it

## Preparations

In [1]:
# import modules
import pickle  # saving files as pickles
import time  # for stopping runtimes
import pandas as pd  # data mangling and transforming
import numpy as np  # handling vectors and matrices
from datetime import datetime  # for timestamps
from secret_sharing import encrypt  # import additive secret sharing encryption

In [2]:
# start timer
start = time.time()

In [3]:
# large prime number (key, hardcoded for now)
Q = 1006940912373947

In [4]:
# minimum and maximum value a secret can achieve
min_value = 0
max_value = 100

In [5]:
# define number of parties and rows per party
n_parties = 3  # columns
n_secrets = 100  # rows

In [6]:
# name columns alphabetically
cnames = []
for i in range(n_parties):
    cnames.append(chr(i+65))  # 65 is ASCII for capital A

## Dataset

### random values

In [7]:
# define random seed
np.random.seed(8888)

In [8]:
# create centralised dataset
odat = np.random.randint(min_value, max_value, n_secrets)
odat[:5]

array([ 3, 54, 99, 72, 41])

In [9]:
# save as pickle
with open('data/originalData.pickle','wb') as f:
    pickle.dump(odat, f)

### secret sharing

In [10]:
# encrypted dataframe
edat = pd.DataFrame(columns=cnames)
for i in odat:
    edat = edat.append(pd.Series(encrypt(i, Q, n_shares=n_parties), 
                                 index=edat.columns), 
                       ignore_index=True)

In [11]:
edat.head()

Unnamed: 0,A,B,C
0,128551022592284,698979257840362,179410631941304
1,711099077448633,529175475894403,773607271404912
2,673330536630493,335642224095234,1004909064022266
3,556574385666995,77986090937608,372380435769416
4,394272659512662,449627879225688,163040373635638


Looks legit, let's save it!

### Save data and time

In [12]:
# data
edat.to_pickle('data/encryptedData.pickle')

In [13]:
# timer
end = time.time()
df_timer = pd.DataFrame(data={'phase': '0: Data Creation and Encryption',
                              'timestamp': str(datetime.utcnow()),
                              'n_secrets': n_secrets,
                              'seconds': [round(end-start,2)]})

with open('data/timer.csv', 'a') as f:
    df_timer.to_csv(f, header=False)