In [1]:
from lusidtools.jupyter_tools import toggle_code

"""Structured Results Store for storage of Portfolio data

Attributes
----------
structured_results_store
virtual_document
luminesce
"""

toggle_code("Toggle Docstring")

# Structured Results Store Example

This notebook demostrates loading of a custom Portfolio dataset into the LUSID Structured Results Store.

For more context on what a Structure Result Store/Data is see KB Article <a ref=https://support.lusid.com/knowledgebase/article/KA-01893/en-us>KA-01893</a>

Once loaded, the dataset can be retrieved as one document, or the individual fields accessed

In [2]:
import os
import pandas as pd
from datetime import datetime, timezone
import io
import json
from IPython.core.display import HTML
from itertools import chain

# Then import the key modules from the LUSID package (i.e. The LUSID SDK)
import lusid as lu
import lusid.models as lm

# And use absolute imports to import key functions from Lusid-Python-Tools and other helper package
from lusid.utilities import ApiClientFactory

# Set DataFrame display formats
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.options.display.float_format = "{:,.2f}".format
display(HTML("<style>.container { width:90% !important; }</style>"))

# Authenticate our user and create our API client
secrets_path = os.getenv("FBN_SECRETS_PATH")

api_factory = ApiClientFactory(
    api_secrets_filename=secrets_path,
    app_name="LusidJupyterNotebook",
)

api_status = pd.DataFrame(
    api_factory.build(lu.ApplicationMetadataApi).get_lusid_versions().to_dict()
)

display(api_status)

Unnamed: 0,api_version,build_version,excel_version,links
0,v0,0.6.9404.0,0.5.2887,"{'relation': 'RequestLogs', 'href': 'http://se..."


In [7]:
srs_api = api_factory.build(lu.StructuredResultDataApi)

<h2>Scope, Effective Date, Result Type</h2>

<ul>
<li>Scope: The scope in which to create or update data maps.</li>
<li>Effective Date: Date at which the structured result is effective from.</li>
<li>Result Type: The Class of the Result, has to be one of [UnitResult/Analytic, UnitResult/Grouped, UnitResult/Holding]</li>
</ul>

In [8]:
scope = "srs" 
effective_date = datetime(2021, 1, 27, tzinfo=timezone.utc)
sample_data_result_type = "UnitResult/Custom" 

## Define functions to create the Data Map


<h3>Data Mapping</h3>

The Structured Result Store allows us to upsert structured results, but to give them structure we must first define a Data Mapping,

You can think of a Data Mapping as defining the columns and the column types for each row that gets upserted into the SRS using a specific Data Map

Each "row" can have multiple keys and when difining the data map there are 4 different key types, Unique, PartOfUnique, Leaf, and CompositeLeaf

<h4>Unique</h4>
<blockquote>A primary key that will throw a unique key constraint if multiple equal values are upserted for a single specific result type</blockquote>
<h4>Part of Unique</h4>
<blockquote>A key that will be considered as part of the compoisite primary key for an entity</blockquote>
<h4>Leaf</h4>
<blockquote>Leaf define a value that wont be considered as part of the primary or primary composite key.</blockquote>
<h4>Composite Leaf</h4>
<blockquote>
CompositeLeaf is an abstraction that allows the user to specify which Leafs should be connected, they define data types rather than names of entities as they represent a group of entities.
E.g. If we have an Accural we can define a composite leaf as two leafs of amount and currency
<p></p>
<blockquote>
"UnitResult/Accrual"

DataDefinition(address="UnitResult/Accrual", dataType= "Result0D", keyType="CompositeLeaf") 
</blockquote>

<blockquote>
"UnitResult/Accrual/Amount"

DataDefinition(address="UnitResult/Accrual/Amount", name="Accrual", dataType= "decimal", keyType="Leaf")
</blockquote>
<blockquote>
"UnitResult/Accrual/Ccy"

DataDefinition(address="UnitResult/Accrual/AmountCcy", name="AccrualCcy", dataType= "string", keyType="Leaf")
</blockquote>
</blockquote>

In [9]:
def gen_srs_data_map(leaf_columns, unique_columns, data_types):
    def define_data_definitions(columns, key_type):
        return [lm.DataDefinition(address=f"UnitResult/{scope}/{column}", name=column, data_type=data_types[column], key_type=key_type) 
                      for column in columns]
    data_map = [define_data_definitions(columns, key_type) for columns, key_type in zip((leaf_columns, unique_columns), ("Leaf", "PartOfUnique"))]

    return lm.DataMapping(data_definitions = list(chain(*data_map)))

def create_data_map(df, code, version, leaf_columns, unique_columns, data_types):
    srs_data_map = gen_srs_data_map(leaf_columns, unique_columns, data_types)
    srs_data_map_key = lm.DataMapKey(version=version, code=code)
    
    try:    
        srs_api.create_data_map(
            scope=scope, 
            request_body={
                code: lm.CreateDataMapRequest(
                    id = srs_data_map_key,
                    data = srs_data_map
                )
            }
        )
    except lu.ApiException as e:
        detail = json.loads(e.body)
        if detail['code'] not in [461]: 
            raise e
    return srs_data_map_key

## Read in sample data

In [10]:
df = pd.read_csv("data/srs_custom_data.csv")
df = df.round(0)
display(df.head(5))

Unnamed: 0,Port,Currency,Date,Sys,Value,Class
0,PortA,AUD,20210713,FSC,50000000,C
1,PortA,CAD,20210713,FSC,138234286,C
2,PortA,CHF,20210713,FSC,203402,C
3,PortA,EUR,20210713,FSC,235402950,C
4,PortA,EUR,20210715,FTC,22342054,C


## Upsert data into the Structured Results Store

In [11]:
def insert_and_load_data(dataFrame):
    srs_ids=[]
    
    version = "1.01"
    portfolios = dataFrame.groupby("Port")
    unique_columns = ["Currency", "Date", "Sys", "Class"]
    leaf_columns = [column for column in dataFrame.columns if column not in unique_columns]
    
    sample_data_map_key = create_data_map(
        df = df,
        code = 'sample_data_map',
        version = "0.0009",
        leaf_columns = leaf_columns,
        unique_columns = unique_columns,
        data_types = { "RowId": "string", 
                       "Port": "string",
                       "Currency": "string",
                       "Date": "string",
                       "Sys": "string",
                       "Value": "decimal",
                       "Class": "string"}
    )

    for portfolio_id, pf_df in portfolios:    
        srs_id = lm.StructuredResultDataId(
            source = "Client", 
            code = portfolio_id, 
            effective_at = effective_date,
            result_type = sample_data_result_type)
    
        srs_ids.append(srs_id) 
    
        csv_data = io.StringIO()
        pf_df.to_csv(csv_data)   
        
        request_body = {
            portfolio_id: lm.UpsertStructuredResultDataRequest(
                id = srs_id,
                data = lm.StructuredResultData(
                    document_format = "csv",
                    version = version,
                    name = "Data file",
                    document = csv_data.getvalue(),
                    data_map_key = sample_data_map_key
                )
            )
        }
        
        result = srs_api.upsert_structured_result_data(
            scope=scope,
            request_body=request_body)

        display(pd.DataFrame(result.values.items()))
        
    return srs_ids


srs_ids = insert_and_load_data(df)

Unnamed: 0,0,1
0,PortA,2022-06-09 17:16:13.542668+00:00


Unnamed: 0,0,1
0,PortB,2022-06-09 17:16:14.778689+00:00


Unnamed: 0,0,1
0,PortC,2022-06-09 17:16:15.494980+00:00


## Retrieve the raw data from the Structured Results Store

In [12]:
def retrieve_data():
    portfolio_ids = df['Port'].unique()

    for portfolio_id in portfolio_ids:
        srs_id = lm.StructuredResultDataId(
            source = "Client", 
            code = portfolio_id, 
            effective_at = effective_date,
            result_type = sample_data_result_type)
        
        result = srs_api.get_structured_result_data(
            scope = scope, 
            request_body = {
                "key": srs_id
            }
        )
    
        csv_data = io.StringIO(result.values["key"].document)
        doc = pd.read_csv(csv_data)
        display(srs_id.code)
        display(doc)
        
retrieve_data()

'PortA'

Unnamed: 0.1,Unnamed: 0,Port,Currency,Date,Sys,Value,Class
0,0,PortA,AUD,20210713,FSC,50000000,C
1,1,PortA,CAD,20210713,FSC,138234286,C
2,2,PortA,CHF,20210713,FSC,203402,C
3,3,PortA,EUR,20210713,FSC,235402950,C
4,4,PortA,EUR,20210715,FTC,22342054,C
5,5,PortA,EUR,20210727,FTC,2349130942,C
6,6,PortA,EUR,20210812,FTC,23489234,C
7,7,PortA,GBP,20210713,FSC,83425656,C
8,8,PortA,GBP,20210812,FTC,54354564,C
9,9,PortA,JPY,20210713,FSC,654659,C


'PortB'

Unnamed: 0.1,Unnamed: 0,Port,Currency,Date,Sys,Value,Class
0,26,PortB,AUD,20210713,FSC,5590075,B
1,27,PortB,AUD,20210812,FTC,5453435,B
2,28,PortB,SGD,20210713,FSC,43545554,B
3,29,PortB,USD,20210713,FSC,543543435453,B
4,30,PortB,EUR,20210713,FSC,54545435,B


'PortC'

Unnamed: 0.1,Unnamed: 0,Port,Currency,Date,Sys,Value,Class
0,31,PortC,AUD,20210713,FSC,75174369,C
1,32,PortC,AUD,20210715,FTC,261361,C
2,33,PortC,AUD,20210716,FTC,73055,C
3,34,PortC,AUD,20210719,FTC,4161646,C
4,35,PortC,AUD,20210720,FTC,787878787,C
5,36,PortC,AUD,20210722,FTC,878787878,C
6,37,PortC,AUD,20210723,FTC,3349533,C
7,38,PortC,AUD,20210728,FTC,8748787,C
8,39,PortC,AUD,20210812,FTC,4864848616,C
9,40,PortC,CAD,20210713,FSC,854684848,C


## Extract a 'Virtual Document' from the Structured Results Store

In [13]:
def retrieve_virtual_document():
    portfolio_ids = df['Port'].unique()

    for portfolio_id in portfolio_ids:
        # Retrieve document from SRS
        srs_id = lm.StructuredResultDataId(
            source = "Client", 
            code = portfolio_id, 
            effective_at = effective_date,
            result_type = sample_data_result_type)
        
        result = srs_api.get_virtual_document(
            scope = scope, 
            request_body = {
                "key": srs_id
            }
        )
        
        # Convert to DataFrame
        result_dfs = []

        for item in result.values["key"].data:
            columns = item.row_data.columns
            values = [i.value for i in item.row_data.values]
    
            row_df = pd.DataFrame(values).T
            row_df.columns = columns
            
            for row in item.row_id.items():            
                row_df[row[0]] = [row[1]]

            result_dfs.append(row_df)
    
        all_dfs = pd.concat(result_dfs)
        
        return all_dfs
        
retrieve_virtual_document()

Unnamed: 0,UnitResult/srs/Port,UnitResult/srs/Value,UnitResult/srs/Class,UnitResult/srs/Currency,UnitResult/srs/Date,UnitResult/srs/Sys
0,PortA,50000000.0,C,AUD,20210713,FSC
0,PortA,534343.0,C,USD,20210719,FTC
0,PortA,4534.0,C,USD,20210726,FTC
0,PortA,3535454.0,C,USD,20210729,FTC
0,PortA,22342054.0,C,EUR,20210715,FTC
0,PortA,18406.0,C,USD,20210810,FTC
0,PortA,454654.0,C,USD,20210715,FTC
0,PortA,-174328.0,C,USD,20210713,FSC
0,PortA,454534135453.0,C,USD,20210716,FTC
0,PortA,83425656.0,C,GBP,20210713,FSC


In [7]:
import lumipy as lm

secrets_path = os.getenv("FBN_LUMI_SECRETS_PATH")
atlas = lm.get_atlas(api_secrets_filename=secrets_path)
tv_df = pd.DataFrame([
    {'Scope': scope, 'Code': 'PortA', 'Source': 'Client', 'ResultType': 'UnitResult/Custom', 'EffectiveAt': effective_date},
    {'Scope': scope, 'Code': 'PortB', 'Source': 'Client', 'ResultType': 'UnitResult/Custom', 'EffectiveAt': effective_date},
    {'Scope': scope, 'Code': 'PortC', 'Source': 'Client', 'ResultType': 'UnitResult/Custom', 'EffectiveAt': effective_date},
])

sres = atlas.lusid_unitresult_structuredresult(to_look_up=lm.from_pandas(tv_df))
qry = sres.select('*')

df = qry.go()
df


Getting atlas🌎


ValueError: The fields ['api_url'] on the ApiConfiguration are set to None, please ensure that you have provided them directly, via a secrets file or environment variables

## Cleanup - delete the data from the Structured Results Store

In [None]:
def delete_data():
    portfolio_ids = df['Port'].unique()

    for portfolio_id in portfolio_ids:
        # Retrieve document from SRS
        srs_id = lm.StructuredResultDataId(
            source = "Client", 
            code = portfolio_id, 
            effective_at = effective_date,
            result_type = sample_data_result_type)
        
        result = srs_api.delete_structured_result_data(
            scope = scope, 
            request_body = {
                "key": srs_id
            }
        )

delete_data()