In [1]:
import pandas as pd
from google.cloud import bigquery
from google.cloud import storage
import io
import hashlib
import time

In [2]:
from src.customer_request import CustomerRequest
from src.hasher import Hasher
from src.customer import Customer

In [3]:
# create the customer org
customer = Customer("MOD")
vars(customer)

{'org_name': 'MOD', 'password': None}

In [4]:
# the client where we store the data (can be BQ)
client = storage.Client()
bucket_name = "rdmf_mock_data"

In [5]:
# customer requests some data, they want AI and BI joined on UPRN:
cus_req = CustomerRequest(
    customer.org_name, 
    "AI", 
    ["AI_ID", "UPRN"], 
    "BI", 
    ["BI_ID", "UPRN", "SIC_code"],
    "UPRN"
)
vars(cus_req)

{'org_name': 'MOD',
 'dataset1': 'AI',
 'dataset1_cols': ['AI_ID', 'UPRN'],
 'dataset2': 'BI',
 'dataset2_cols': ['BI_ID', 'UPRN', 'SIC_code'],
 'merge_col': 'UPRN',
 'req_id': 0,
 'epoch': 1709809219.9857605,
 'password_prefix': '7779680',
 'password_suffix': '844'}

In [6]:
# we set the data they request:
cus_req.set_data(client, bucket_name)

In [7]:
# we return their password to them for storage:
customer_password = cus_req.create_user_password()
print(f"customer password: {customer_password}")
print(f"note the password prefix (which we use for an identifier): {cus_req.password_prefix}")
print(f"but not the password suffix (which we store for security): {cus_req.password_suffix}")
customer.set_password(customer_password)

customer password: 7779680112464
note the password prefix (which we use for an identifier): 7779680
but not the password suffix (which we store for security): 844


In [8]:
# we encrypt the data with the combination of our keys
encrypted_data = cus_req.get_data(Hasher, customer.get_password(), ["AI_ID", "BI_ID"])
customer.set_data(encrypted_data)

In [9]:
# the customer now has data
vars(customer)

{'org_name': 'MOD',
 'password': '7779680112464',
 'data':                                                  AI_ID          UPRN  \
 0    0008732f01f2d4255031586ea2a4c3407f9631ee58cb54...  950930887871   
 1    001fa7a1088d4df518c8563db2a9a13427db896a4dbb4e...  950305795301   
 2    0075383b12edaf268c18047c48f197b232f2773eabd2f7...  950301370654   
 3    009d3d323a407a93843c66451fa9f5d521be631c738ab5...  950298826255   
 4    00d0d1e90cb23c6407db04df032fe3c5062e61d68274b2...  950572492129   
 ..                                                 ...           ...   
 995  fedc3d3c4677530497cc41f69434d5d0432cd1cf42dd50...  950051056730   
 996  ff23c964bea01347d014db45b2f9bd2b838fcd5411683e...  950483936664   
 997  ff3871bf4daa2c46c0d7f5e7a7b3ebab2b72fb9fa91a62...  950489234984   
 998  ff6cb7e46ad5678dc4f2d15ed0047da76d1a9c40bbc717...  950507373301   
 999  ffc062dcd7b5117c9db0f3622cf3067f006cc55863bf89...  950368771438   
 
                                                  BI_ID  SIC_cod

In [12]:
customer.data

Unnamed: 0,AI_ID,UPRN,BI_ID,SIC_code
0,0008732f01f2d4255031586ea2a4c3407f9631ee58cb54...,950930887871,ccaf3a6345192d037b444a6f90893503769fb4ab4b60ac...,83343
1,001fa7a1088d4df518c8563db2a9a13427db896a4dbb4e...,950305795301,850a7a5e5b42353bcc35d99d9209ae584075792b958262...,15845
2,0075383b12edaf268c18047c48f197b232f2773eabd2f7...,950301370654,204953fd0e1e80ef4c14a952dec41ac4d2bee57291182d...,64152
3,009d3d323a407a93843c66451fa9f5d521be631c738ab5...,950298826255,9a83c002054504b760d56bc5ebce701d7b052fc7b406eb...,678
4,00d0d1e90cb23c6407db04df032fe3c5062e61d68274b2...,950572492129,c0adc1b99c52e0753afcaea09c95fb282609338d7202a0...,26475
...,...,...,...,...
995,fedc3d3c4677530497cc41f69434d5d0432cd1cf42dd50...,950051056730,4763e8a96385212018be3ea016dbb743aeeadff61dfcb0...,14805
996,ff23c964bea01347d014db45b2f9bd2b838fcd5411683e...,950483936664,432aad23c3e1b04cd44e3d25b28af122aec4310140772a...,90194
997,ff3871bf4daa2c46c0d7f5e7a7b3ebab2b72fb9fa91a62...,950489234984,e0787aa2485c3d6f60510632e21825387f1a2daa606220...,75608
998,ff6cb7e46ad5678dc4f2d15ed0047da76d1a9c40bbc717...,950507373301,f0eaaa01c831b350750e21b55ea55019764b72fa75fa73...,14368


In [11]:
cus_req.data["AI"]

Unnamed: 0,AI_ID,UPRN
0,109883574750268,950698168954
1,109256760223223,950320937277
2,109893804250876,950186421264
3,109805082504690,950680091532
4,109403802151520,950396831655
...,...,...
100995,109984358090240,950178626481
100996,109065983079912,950672927828
100997,109186696116433,950696064737
100998,109788394420272,950334094218
