# Create Sample Data & Encrypt it

## Preparations

In [None]:
# import modules
import pickle  # saving files as pickles
import time  # for stopping runtimes
import pandas as pd  # data mangling and transforming
import numpy as np  # handling vectors and matrices
from datetime import datetime  # for timestamps
from secret_sharing import encrypt  # import additive secret sharing encryption

In [None]:
# start timer
start = time.time()

In [None]:
# large prime number (key, hardcoded for now)
Q = 1006940912373947

In [None]:
# minimum and maximum value a secret can achieve
min_value = 0
max_value = 10000

In [None]:
# define number of parties and rows per party
n_parties = 3  # columns
n_secrets = 10000 # rows

In [None]:
# name columns alphabetically
cnames = []
for i in range(n_parties):
    cnames.append(chr(i+65))  # 65 is ASCII for capital A

## Dataset

### random values

In [None]:
# define random seed
np.random.seed(8888)

In [None]:
# create centralised dataset
odat = np.random.randint(min_value, max_value, n_secrets)
odat[:5]

In [None]:
# save as pickle
with open('data/originalData.pickle','wb') as f:
    pickle.dump(odat, f)

### secret sharing

In [None]:
# encrypted dataframe
edat = pd.DataFrame(columns=cnames)
for i in odat:
    edat = edat.append(pd.Series(encrypt(i, Q, n_shares=n_parties), 
                                 index=edat.columns), 
                       ignore_index=True)

In [None]:
edat.head()

Looks legit, let's save it!

### Save data and time

In [None]:
# data
edat.to_pickle('data/encryptedData.pickle')

In [None]:
# timer
end = time.time()
df_timer = pd.DataFrame(data={'phase': '0: Data Creation and Encryption',
                              'timestamp': str(datetime.utcnow()),
                              'n_secrets': n_secrets,
                              'seconds': [round(end-start,2)]})

with open('data/timer.csv', 'a') as f:
    df_timer.to_csv(f, header=False)