# Create Sample Data & Encrypt it

## Preparations

In [1]:
# import modules
import os  # operating system operations
import pickle  # saving files as pickles
import time  # for stopping runtimes
import pandas as pd  # data mangling and transforming
import numpy as np  # handling vectors and matrices
from datetime import datetime  # for timestamps
from secret_sharing import encrypt  # import additive secret sharing encryption

In [2]:
# start timer
start = time.time()

In [3]:
# large prime number (key, hardcoded for now)
Q = 1006940912373947

In [4]:
# minimum and maximum value a secret can achieve
min_value = 0
max_value = 10000

In [5]:
# define number of parties and rows per party
n_parties = 3  # columns
n_secrets = 1000000 # rows

In [6]:
# name columns alphabetically
cnames = []
for i in range(n_parties):
    cnames.append(chr(i+65))  # 65 is ASCII for capital A

## Dataset

### random values

In [7]:
# define random seed
np.random.seed(8888)

In [8]:
# create centralised dataset
odat = np.random.randint(min_value, max_value, n_secrets)
odat[:5]

array([4483, 2120, 6825, 5612, 2224])

In [9]:
# save as pickle
if not os.path.exists('data'):
    os.makedirs('data')
with open('data/originalData.pickle','wb') as f:
    pickle.dump(odat, f)

In [10]:
# create encrypted dataframe
elist = list(map(lambda i: encrypt(i, Q=Q, n_shares=n_parties), odat))
edat = pd.DataFrame(elist, columns=cnames)

### secret sharing

In [11]:
edat.head()

Unnamed: 0,A,B,C
0,514617663176651,623262597775810,876001563799916
1,238578557594815,5591690999913,762770663781339
2,120452427130016,219338951913990,667149533336766
3,876214611308548,453090361354719,684576852090239
4,918163228303103,694728499072282,400990097374733


Looks legit, let's save it!

### Save data and time

In [None]:
# data
edat.to_pickle('data/encryptedData.pickle')

In [None]:
# timer
end = time.time()
df_timer = pd.DataFrame(data={'phase': '0: Data Creation and Encryption',
                              'timestamp': str(datetime.utcnow()),
                              'n_secrets': n_secrets,
                              'seconds': [round(end-start,2)]})

with open('data/timer.csv', 'a') as f:
    df_timer.to_csv(f, header=False)