# Computation of repartition keys, valuation weights and dedicated values

### Necessary imports

In [1]:
import sys
import os
sys.path.append(os.path.abspath('..'))

import pandas as pd
import numpy as np
from pathlib import Path
from TPT_generator_python import Data_Bucket, TPT_Fetcher

### Variables definition

In [2]:
DATE = pd.to_datetime("2020-12-31").date()
CLIENT = "Dynasty"
ISIN = "LU1280365476"
SOURCE_DIR = Path("./data")

In [3]:
fetcher = TPT_Fetcher(DATE, CLIENT, ISIN, SOURCE_DIR)
bucket = Data_Bucket(CLIENT, fetcher)

### Acquiring datas from database
#### Instruments infos
Using a Data_Bucket object which abstract database accesses through the fetcher object (see documentation), we get all necessary data from the database and store them in a dataframe.

We get the list of instruments in the subfund's portfolio, their market values and the dedication indicator which define the group of shareclasses, the instrument must be distributed to.

In [32]:
instruments = bucket.get_instruments(indicator="all", info=["hedge_indicator",
                                                            "market_and_accrued_fund"])
instruments = instruments.sort_index()
instruments.head()

Unnamed: 0_level_0,hedge_indicator,market_and_accrued_fund
14_Identification code of the financial instrument,Unnamed: 1_level_1,Unnamed: 2_level_1
BE6286986284,LU6517-NH,6501000.0
CA01CHF,LU6517-NH,206.58
CA01CHFHA,LU6517-HA-C2-C5-D1,4355919.83
CA01EUR,LU6517-NH,25423090.51
CA01EURHA,LU6517-HA-C2-C5-D1,17557.09


#### Shareclasses infos
Here we get the list of shareclasses in the subfund, then acquire their NAV and list of groups they're included into.

In [5]:
shareclasses = bucket.get_subfund_shareclasses()
print(shareclasses)

['LU1280365633', 'LU1280365476', 'LU1280365393', 'LU1280365559', 'LU1483663818', 'LU1840818220', 'LU1586705938', 'LU1508332993']


In [6]:
NAVs = pd.DataFrame(index=shareclasses, 
                    columns=["shareclass_total_net_asset_sf_curr",
                             "subfund_total_net_asset",
                             "indicators"],
                    dtype=object)

NAVs["indicators"] = NAVs["indicators"].astype(object)
for isin in shareclasses:
    NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"] = \
        bucket.get_shareclass_nav(isin=isin, info="shareclass_total_net_asset_sf_curr")
    NAVs.loc[isin, "subfund_total_net_asset"] = \
        bucket.get_shareclass_nav(isin=isin, info="subfund_total_net_asset")
    NAVs.at[isin, "indicators"] = \
        [bucket.get_subfund_infos("subfund_indicator"),
         bucket.get_shareclass_infos(isin=isin, info="shareclass"),
         bucket.get_shareclass_infos(isin=isin, info="shareclass_id")
                                   ]
NAVs

Unnamed: 0,shareclass_total_net_asset_sf_curr,subfund_total_net_asset,indicators
LU1280365633,133117000.0,330155000.0,"[LU6517-NH, B, LU6517-NH]"
LU1280365476,23861800.0,330155000.0,"[LU6517-NH, A, LU6517-HA-C2-C5-D1]"
LU1280365393,103270000.0,330155000.0,"[LU6517-NH, A, LU6517-NH]"
LU1280365559,13477100.0,330155000.0,"[LU6517-NH, A, LU6517-HB-C3-C7]"
LU1483663818,37483300.0,330155000.0,"[LU6517-NH, B, LU6517-HA-C2-C5-D1]"
LU1840818220,1223150.0,330155000.0,"[LU6517-NH, B, LU6517-HC-C8]"
LU1586705938,5830890.0,330155000.0,"[LU6517-NH, B, LU6517-HB-C3-C7]"
LU1508332993,11892100.0,330155000.0,"[LU6517-NH, D, LU6517-NH]"


## Computation of the distributions matrices
To compute the repartition keys, valuation weights and dedicated values for each instruments and shareclasses in a subfund, we proceed by computing a set of distributions matrices at the subfund level. This approache allows us to perform more checks and assert the coherence of our calculations at the subfund level. 

###  Indicator matrix (BETAS)
The first distribution matrice we will use is the BETAS, it is a binary matrice built by doing the cartesian product of the shareclasses with the instruments.

Each $\beta_{i,j} = BETAS(i,j)$ is defined as follows:
$$
\beta_{i,j} = \left\{
    \begin{array}\\
        1 & \mbox{if } \ \text{instrument i is distributed to shareclass j}\\
        0 & \mbox{if } \ \text{not} \\
    \end{array}
\right.
$$

We then append a column "fund" to the matrix BETAS which is the product of all column for each row.

In [7]:
BETAS = pd.DataFrame(1, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    BETAS[isin].where(
        instruments["hedge_indicator"].isin(NAVs.loc[isin, "indicators"]),
        0,
        inplace=True)
BETAS.sort_index(inplace=True)
BETAS["fund"] = 1
for isin in shareclasses:
    BETAS["fund"] = BETAS["fund"] * BETAS[isin]

BETAS.head()

Unnamed: 0_level_0,LU1280365633,LU1280365476,LU1280365393,LU1280365559,LU1483663818,LU1840818220,LU1586705938,LU1508332993,fund
14_Identification code of the financial instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
BE6286986284,1,1,1,1,1,1,1,1,1
CA01CHF,1,1,1,1,1,1,1,1,1
CA01CHFHA,0,1,0,0,1,0,0,0,0
CA01EUR,1,1,1,1,1,1,1,1,1
CA01EURHA,0,1,0,0,1,0,0,0,0


### Shareclasses Key matrix (SK)
The second distribution matrice we will use is the Shareclasses Key, it is built in the same way as the BETAS matrix (excluding the added colun "fund"). Where each SK(i,j) is defined as follow:
$$
SK(i,j) = \left\{
    \begin{array}\\
        \text{NAV of shareclass j} & \mbox{if } \ \beta_{i,j} = 1 \\
        0 & \mbox{if } \ \beta_{i,j} = 0 \\
    \end{array}
\right.
$$
the NAV of the shareclass j if instrument j is distributed to j. 

In [8]:
SK = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    SK[isin].where(
        ~(instruments["hedge_indicator"].isin(NAVs.loc[isin, "indicators"])),
        NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"].astype('float64'),
        inplace=True)
SK.sort_index(inplace=True)
SK.head()

Unnamed: 0_level_0,LU1280365633,LU1280365476,LU1280365393,LU1280365559,LU1483663818,LU1840818220,LU1586705938,LU1508332993
14_Identification code of the financial instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BE6286986284,133117100.0,23861795.52,103269900.0,13477144.15,37483345.33,1223148.8,5830894.41,11892129.69
CA01CHF,133117100.0,23861795.52,103269900.0,13477144.15,37483345.33,1223148.8,5830894.41,11892129.69
CA01CHFHA,0.0,23861795.52,0.0,0.0,37483345.33,0.0,0.0,0.0
CA01EUR,133117100.0,23861795.52,103269900.0,13477144.15,37483345.33,1223148.8,5830894.41,11892129.69
CA01EURHA,0.0,23861795.52,0.0,0.0,37483345.33,0.0,0.0,0.0


### Dedicated values matrix (D)
The third matrice we will use is the dedicated value matrice, where each $d_{i,j}$ is defined as the value of instrument i associated to the shareclass j.

In the general case, the value of instrument i is distributed over each shareclass it is associated with proportionately to the respective NAV of the shareclasses.

The general definition of $d_{i,j}$ is as follow:
$$
d_{i,j} = I_i \cdot \frac{\beta_{i,j} S_j}{\sum_{k=0}^{m} \beta_{i,k} S_k}
$$

#### Repartition keys matrix (ALPHAS)

We can then define the repartition key $\alpha_{i,j}$ of each instrument i to each shareclass j such that:

$$
d_{i,j} = \alpha_{i,j} \cdot I_i
$$

which gives:

$$
\alpha_{i,j} = \frac{\beta_{i,j} S_j}{\sum_{k=0}^{m} \beta_{i,k} S_k}
$$

In [9]:
ALPHAS = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    ALPHAS[isin] = SK[isin] * 1 / SK.sum(axis=1)
ALPHAS.sort_index(inplace=True)
ALPHAS.head()

Unnamed: 0_level_0,LU1280365633,LU1280365476,LU1280365393,LU1280365559,LU1483663818,LU1840818220,LU1586705938,LU1508332993
14_Identification code of the financial instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BE6286986284,0.403195,0.072274,0.312792,0.040821,0.113532,0.003705,0.017661,0.03602
CA01CHF,0.403195,0.072274,0.312792,0.040821,0.113532,0.003705,0.017661,0.03602
CA01CHFHA,0.0,0.388976,0.0,0.0,0.611024,0.0,0.0,0.0
CA01EUR,0.403195,0.072274,0.312792,0.040821,0.113532,0.003705,0.017661,0.03602
CA01EURHA,0.0,0.388976,0.0,0.0,0.611024,0.0,0.0,0.0


#### Computing D (first version)
Once we have the ALPHAS matrix we should be able to compute the value $d_{i,j}$ of each instrument i dedicated to each shareclass j by multiplying each instrument value $I_i$ by $\alpha_{i,j}$.

In [10]:
D = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    D[isin].where(
        ~(instruments["hedge_indicator"].isin(NAVs.loc[isin, "indicators"])),
        instruments["market_and_accrued_fund"]*ALPHAS[isin],
        inplace=True)
D.sort_index(inplace=True)
D.head()

Unnamed: 0_level_0,LU1280365633,LU1280365476,LU1280365393,LU1280365559,LU1483663818,LU1840818220,LU1586705938,LU1508332993
14_Identification code of the financial instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BE6286986284,2621173.0,469856.0,2033459.0,265374.7,738074.2,24084.684049,114814.525907,234164.630051
CA01CHF,83.29209,14.93045,64.6165,8.432719,23.45352,0.765331,3.648421,7.440967
CA01CHFHA,0.0,1694349.0,0.0,0.0,2661571.0,0.0,0.0,0.0
CA01EUR,10250470.0,1837439.0,7952130.0,1037786.0,2886345.0,94186.602442,448998.628519,915734.284577
CA01EURHA,0.0,6829.289,0.0,0.0,10727.8,0.0,0.0,0.0


#### Checking the results
To check the results, we simply have to sum the dedicated values $d_{i,j}$ for each shareclass j and we should find the NAV of each shareclass.

In [11]:
for isin in shareclasses:
    print(f"NAV {isin} :", NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"])
    print("computed NAV: ", D[isin].sum(), "\n")

NAV LU1280365633 : 133117141.86
computed NAV:  130113493.42920852 

NAV LU1280365476 : 23861795.52
computed NAV:  23230896.67926649 

NAV LU1280365393 : 103269877.17
computed NAV:  100939700.90437728 

NAV LU1280365559 : 13477144.15
computed NAV:  13094936.580016136 

NAV LU1483663818 : 37483345.33
computed NAV:  36492296.72698563 

NAV LU1840818220 : 1223148.8
computed NAV:  1208117.4325740454 

NAV LU1586705938 : 5830894.41
computed NAV:  5665532.078153265 

NAV LU1508332993 : 11892129.69
computed NAV:  11623796.279418634 



#### Erroneous values
As we can see, the computed NAVs does not match the NAVs given by the clients, which means that we are doing something wrong.

###  Investigation
Let's investigate our datas to find where is our error.

#### Check that we have all the instruments
First, we begin by checking that the values of all instruments and the NAVs of all shareclasses both sum to the total NAV of the subfund.

In [12]:
print("total NAV of the subfund: ", NAVs["subfund_total_net_asset"].iloc[0])
print("sum of shareclasses' NAV: ", NAVs["shareclass_total_net_asset_sf_curr"].sum())
print("sum of instruments values: ", instruments["market_and_accrued_fund"].sum())

total NAV of the subfund:  330155476.93
sum of shareclasses' NAV:  330155476.93
sum of instruments values:  322368770.1099999


As we can see, there is no mismatch in the datas we are given, this means that the errors comes from our computations.

#### Check for value loss

Now let's check that we are not ommitting any amount and that we do not lose (or create) any money along the way. In order to do that we will check that the sum of distributed values for each instrument sum to 100% of the instrument value.

In [13]:
percentage = D.sum(axis=1) / instruments["market_and_accrued_fund"]
print(percentage.head())
print("erroneous instruments: ", percentage.loc[percentage.round(5)!=1])

14_Identification code of the financial instrument
BE6286986284    1.0
CA01CHF         1.0
CA01CHFHA       1.0
CA01EUR         1.0
CA01EURHA       1.0
dtype: float64
erroneous instruments:  14_Identification code of the financial instrument
DEEUR          NaN
FREUR          NaN
GB00BYWTW061   NaN
ITEUR          NaN
XSEUR          NaN
dtype: float64


All distributions indeed sum to 100% meaning that we do not lose any value during the distribution. Rather, this means that we do not correctly distribute our instruments over the shareclasses.

#### Computing D (correct version)
In fact the discrepencies between our computed NAVs and the NAVs provided by the client comes from our definition of our $d_{i,j}$ values. 

To correctly distribute the values of the instruments over the shareclasses we have to split our instruments into two categories: 
- the instruments which are at the subfund's level and must be distributed over all shareclasses of the subfund
- the instruments which are dedicated to a strict sub-set of shareclasses in the subfund.

let's denote :
- $\mathcal{I}_s$ the set of instruments dedicated to the whole subfund
- $\mathcal{I}_d$ the set of instruments dedicated to a strict subset of shareclasses

For $I_i \in \mathcal{I}_d$ we keep the same definition of $d_{i,j}$:

$$
d_{i,j} = I_i \cdot \frac{\beta_{i,j} S_j}{\sum_{k=0}^{m} \beta_{i,k} S_k}
$$

However for $I_i \in \mathcal{I}_s$ we exclude the values of all instruments $I_k \in \mathcal{I}_d$ which leads to:

$$
d_{i,j} = I_i \cdot \frac{S_j - (\beta_{i,j} \cdot \sum_{k | I_k \in \mathcal{I}_d} d_{k,j})}
                         {\sum_{k | I_k \in \mathcal{I}_s} S_k - \sum_{k | I_k \in \mathcal{I}_d} I_k} 
$$

In [14]:
D = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    D.loc[BETAS["fund"]==0, isin] = \
        instruments.loc[BETAS["fund"]==0, "market_and_accrued_fund"] \
        * SK.loc[BETAS["fund"]==0, isin] / SK.loc[BETAS["fund"]==0].sum(axis=1)

for isin in shareclasses:
    D.loc[BETAS["fund"]==1, isin] = \
        instruments.loc[BETAS["fund"]==1, "market_and_accrued_fund"] \
        * (SK.loc[BETAS["fund"]==1, isin] \
           - D.loc[((BETAS[isin]==1) & (BETAS["fund"]==0)), isin].sum()) \
        / (SK.loc[BETAS["fund"]==1].sum(axis=1) \
           - instruments.loc[BETAS["fund"]==0, "market_and_accrued_fund"].sum())

D.sort_index(inplace=True)
D.head()

Unnamed: 0_level_0,LU1280365633,LU1280365476,LU1280365393,LU1280365559,LU1483663818,LU1840818220,LU1586705938,LU1508332993
14_Identification code of the financial instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BE6286986284,2618499.0,471195.9,2031384.0,266640.5,740179.0,23812.903496,115362.173517,233925.787105
CA01CHF,83.20713,14.97303,64.55059,8.472942,23.52041,0.756694,3.665823,7.433378
CA01CHFHA,0.0,1694349.0,0.0,0.0,2661571.0,0.0,0.0,0.0
CA01EUR,10240020.0,1842679.0,7944019.0,1042736.0,2894576.0,93123.765711,451140.282843,914800.255068
CA01EURHA,0.0,6829.289,0.0,0.0,10727.8,0.0,0.0,0.0


#### Checking the results
To check the results, we simply have to sum the dedicated values $d_{i,j}$ for each shareclass j and we should find the NAV of each shareclass.

In [15]:
for isin in shareclasses:
    print(f"NAV {isin} :", NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"])
    print("computed NAV: ", D[isin].sum(), "\n")

NAV LU1280365633 : 133117141.86
computed NAV:  129980780.43114108 

NAV LU1280365476 : 23861795.52
computed NAV:  23297410.85411699 

NAV LU1280365393 : 103269877.17
computed NAV:  100836744.55467069 

NAV LU1280365559 : 13477144.15
computed NAV:  13157769.983776249 

NAV LU1483663818 : 37483345.33
computed NAV:  36596780.64074525 

NAV LU1840818220 : 1223148.8
computed NAV:  1194626.4037443872 

NAV LU1586705938 : 5830894.41
computed NAV:  5692716.987557543 

NAV LU1508332993 : 11892129.69
computed NAV:  11611940.254247762 



As we can see, we are able to retrieve the NAVs of each shareclass by summing the distributed values of all instruments.

In [17]:
Valuation_weight = D.apply(lambda x: x/NAVs.loc[x.name, "shareclass_total_net_asset_sf_curr"]) 
#pd.DataFrame(index=D.index, columns=D.columns)
Valuation_weight["LU1280365476"]


14_Identification code of the financial instrument
BE6286986284    1.974688e-02
CA01CHF         6.274896e-07
CA01CHFHA       7.100676e-02
CA01EUR         7.722299e-02
CA01EURHA       2.862018e-04
                    ...     
XS2198575271    3.397801e-03
XS2239639433    1.817972e-02
XS2257580857    3.212604e-03
XS2269112863    1.721248e-02
XSEUR           0.000000e+00
Name: LU1280365476, Length: 150, dtype: float64