# Create Sample Data & Encrypt it

## Preparations

In [1]:
# import modules
import pandas as pd  # data mangling and transforming
import numpy as np  # handling vectors and matrices
import pickle  # saving files as pickles
from secret_sharing import encrypt  # import additive secret sharing encryption

In [2]:
# large prime number (key, hardcoded for now)
Q = 1006940912373947

In [3]:
# minimum and maximum value a secret can achieve
min_value = 0
max_value = 10000

In [4]:
# define number of parties and rows per party
n_parties = 3  # columns
n_secrets = 100  # rows

In [5]:
# name columns alphabetically
cnames = []
for i in range(n_parties):
    cnames.append(chr(i+65))  # 65 is ASCII for capital A

## Dataset

### random values

In [6]:
# define random seed
np.random.seed(8888)

In [7]:
# create centralised dataset
odat = np.random.randint(min_value, max_value, n_secrets)
odat[:5]

array([4483, 2120, 6825, 5612, 2224])

In [8]:
# save as pickle
with open('data/originalData.pickle','wb') as f:
    pickle.dump(odat, f)

### secret sharing

In [9]:
# encrypted dataframe
edat = pd.DataFrame(columns=cnames)
for i in odat:
    edat = edat.append(pd.Series(encrypt(i, Q, n_shares=n_parties), 
                                 index=edat.columns), 
                       ignore_index=True)

In [10]:
edat.head()

Unnamed: 0,A,B,C
0,652785541339293,664216480691521,696879802721563
1,247859138410370,891129324909782,874893361429862
2,51053520443953,484962661525521,470924730411298
3,262370079745870,985526007409221,765985737598415
4,844204732288373,525462031525918,644215060935827


Looks legit, let's save it!

In [11]:
edat.to_pickle('data/encryptedData.pickle')