# Computation of repartition keys, valuation weights and dedicated values

### Necessary imports

In [1]:
import sys
import os
sys.path.append(os.path.abspath('..'))

import pandas as pd
import numpy as np
from pathlib import Path
from TPT_generator_python import DataBucket, TPTFetcher

### Variables definition

In [2]:
DATE = pd.to_datetime("2020-12-31").date()
CLIENT = "Pictet"
ISIN = "LU1787059465"
SOURCE_DIR = Path("../data")

In [3]:
#fetcher = TPT_Fetcher(DATE, CLIENT, ISIN, SOURCE_DIR)
bucket = DataBucket(DATE, CLIENT, ISIN, SOURCE_DIR)
groups = bucket.get_groups()
groups

0     1
10    5
11    6
12    7
Name: group_id, dtype: int64

In [23]:
fetcher = TPTFetcher(DATE, CLIENT, ISIN, SOURCE_DIR)
shareclass_infos = fetcher.fetch_shareclass_infos(ISIN)
shareclass_infos
id_subfund = shareclass_infos["id_subfund"].iloc[0]
id_subfund
isins = fetcher.fetch_subfund_shareclasses(id_subfund)
shareclasses_infos = fetcher.fetch_shareclass_infos(isins)
shareclasses_infos
id_list = shareclasses_infos["id"].tolist()
id_list
group_map = fetcher.fetch_group_map(id_list)
group_map.loc[group_map["shareclass_id"]==shareclass_infos["id"].iloc[0]]
isins
inst = fetcher.fetch_instruments(id_subfund, DATE)
inst["market_value_fund"].sum()

93072487.53

### Acquiring datas from database
#### Instruments infos
Using a Data_Bucket object which abstract database accesses through the fetcher object (see documentation), we get all necessary data from the database and store them in a dataframe.

We get the list of instruments in the subfund's portfolio, their market values and the dedication indicator which define the group of shareclasses, the instrument must be distributed to.

In [22]:
instruments = bucket.get_instruments_by_index(idx=slice(None), info=["id_group", "market_value_fund"])
instruments = instruments.sort_index()
instruments["market_value_fund"].sum()

93072487.53000002

#### Shareclasses infos
Here we get the list of shareclasses in the subfund, then acquire their NAV and list of groups they're included into.

In [6]:
shareclasses = bucket.get_subfund_shareclasses()
print(shareclasses)

['LU1787059465', 'LU1787059036', 'LU1787061107', 'LU1787061362', 'LU1787061529', 'LU1787060471']


In [7]:
NAVs = pd.DataFrame(index=shareclasses, 
                    columns=["shareclass_total_net_asset_sf_curr",
                             "subfund_total_net_asset",
                             "groups"],
                    dtype=object)

NAVs["groups"] = NAVs["groups"].astype(object)
for isin in shareclasses:
    NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"] = \
        bucket.get_shareclass_nav(isin=isin, info="shareclass_total_net_asset_sf_curr")
    NAVs.loc[isin, "subfund_total_net_asset"] = \
        bucket.get_shareclass_nav(isin=isin, info="subfund_total_net_asset")
    NAVs.at[isin, "groups"] = bucket.get_groups(isin).tolist()
NAVs["groups"]

LU1787059465    [1, 5, 6, 7]
LU1787059036       [2, 5, 9]
LU1787061107      [3, 5, 10]
LU1787061362             [5]
LU1787061529      [4, 5, 11]
LU1787060471       [4, 5, 8]
Name: groups, dtype: object

## Computation of the distributions matrices
To compute the repartition keys, valuation weights and dedicated values for each instruments and shareclasses in a subfund, we proceed by computing a set of distributions matrices at the subfund level. This approache allows us to perform more checks and assert the coherence of our calculations at the subfund level. 

###  Indicator matrix (BETAS)
The first distribution matrice we will use is the BETAS, it is a binary matrice built by doing the cartesian product of the shareclasses with the instruments.

Each $\beta_{i,j} = BETAS(i,j)$ is defined as follows:
$$
\beta_{i,j} = \left\{
    \begin{array}\\
        1 & \mbox{if } \ \text{instrument i is distributed to shareclass j}\\
        0 & \mbox{if } \ \text{not} \\
    \end{array}
\right.
$$

We then append a column "fund" to the matrix BETAS which is the product of all column for each row.

In [8]:
BETAS = pd.DataFrame(1, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    BETAS[isin].where(
        instruments["id_group"].isin(NAVs.loc[isin, "groups"]),
        0,
        inplace=True)
BETAS.sort_index(inplace=True)
BETAS["fund"] = 1
for isin in shareclasses:
    BETAS["fund"] = BETAS["fund"] * BETAS[isin]

BETAS.loc[BETAS["fund"]==1]

Unnamed: 0_level_0,LU1787059465,LU1787059036,LU1787061107,LU1787061362,LU1787061529,LU1787060471,fund
instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
12100,1,1,1,1,1,1,1
15000,1,1,1,1,1,1,1
15000,1,1,1,1,1,1,1
15000,1,1,1,1,1,1,1
15000,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...
US8716071076,1,1,1,1,1,1,1
US87612E1064,1,1,1,1,1,1,1
US92220P1057,1,1,1,1,1,1,1
US92826C8394,1,1,1,1,1,1,1


### Shareclasses Key matrix (SK)
The second distribution matrice we will use is the Shareclasses Key, it is built in the same way as the BETAS matrix (excluding the added colun "fund"). Where each SK(i,j) is defined as follow:
$$
SK(i,j) = \left\{
    \begin{array}\\
        \text{NAV of shareclass j} & \mbox{if } \ \beta_{i,j} = 1 \\
        0 & \mbox{if } \ \beta_{i,j} = 0 \\
    \end{array}
\right.
$$
the NAV of the shareclass j if instrument j is distributed to j. 

In [11]:
SK = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    SK[isin].where(
        ~(instruments["id_group"].isin(NAVs.loc[isin, "groups"])),
        NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"].astype('float64'),
        inplace=True)
SK.sort_index(inplace=True)
SK.head()

Unnamed: 0_level_0,LU1787059465,LU1787059036,LU1787061107,LU1787061362,LU1787061529,LU1787060471
instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12100,7248761.65,1529583.13,28005844.68,14538855.37,18324115.82,33209810.28
15000,7248761.65,1529583.13,28005844.68,14538855.37,18324115.82,33209810.28
15000,0.0,0.0,0.0,0.0,18324115.82,0.0
15000,0.0,0.0,0.0,0.0,18324115.82,0.0
15000,7248761.65,1529583.13,28005844.68,14538855.37,18324115.82,33209810.28


### Dedicated values matrix (D)
The third matrice we will use is the dedicated value matrice, where each $d_{i,j}$ is defined as the value of instrument i associated to the shareclass j.

In the general case, the value of instrument i is distributed over each shareclass it is associated with proportionately to the respective NAV of the shareclasses.

The general definition of $d_{i,j}$ is as follow:
$$
d_{i,j} = I_i \cdot \frac{\beta_{i,j} S_j}{\sum_{k=0}^{m} \beta_{i,k} S_k}
$$

#### Repartition keys matrix (ALPHAS)

We can then define the repartition key $\alpha_{i,j}$ of each instrument i to each shareclass j such that:

$$
d_{i,j} = \alpha_{i,j} \cdot I_i
$$

which gives:

$$
\alpha_{i,j} = \frac{\beta_{i,j} S_j}{\sum_{k=0}^{m} \beta_{i,k} S_k}
$$

In [12]:
ALPHAS = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    ALPHAS[isin] = SK[isin] * 1 / SK.sum(axis=1)
ALPHAS.sort_index(inplace=True)
ALPHAS.head()

Unnamed: 0_level_0,LU1787059465,LU1787059036,LU1787061107,LU1787061362,LU1787061529,LU1787060471
instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12100,0.070474,0.014871,0.27228,0.14135,0.178151,0.322874
15000,0.070474,0.014871,0.27228,0.14135,0.178151,0.322874
15000,0.0,0.0,0.0,0.0,1.0,0.0
15000,0.0,0.0,0.0,0.0,1.0,0.0
15000,0.070474,0.014871,0.27228,0.14135,0.178151,0.322874


#### Computing D (first version)
Once we have the ALPHAS matrix we should be able to compute the value $d_{i,j}$ of each instrument i dedicated to each shareclass j by multiplying each instrument value $I_i$ by $\alpha_{i,j}$.

In [15]:
D = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    D[isin].where(
        ~(instruments["id_group"].isin(NAVs.loc[isin, "groups"])),
        instruments["market_value_fund"]*ALPHAS[isin],
        inplace=True)
D.sort_index(inplace=True)
D.head()

Unnamed: 0_level_0,LU1787059465,LU1787059036,LU1787061107,LU1787061362,LU1787061529,LU1787060471
instrument,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12100,-487.985839,-102.971369,-1885.35,-978.754149,-1233.577467,-2235.681
15000,-499.565453,-105.414818,-1930.089,-1001.979403,-1262.849527,-2288.732
15000,0.0,0.0,0.0,0.0,257400.55,0.0
15000,0.0,0.0,0.0,0.0,-289684.32,0.0
15000,-392430.328293,-82807.911039,-1516168.0,-787098.274346,-992023.070165,-1797898.0


#### Checking the results
To check the results, we simply have to sum the dedicated values $d_{i,j}$ for each shareclass j and we should find the NAV of each shareclass.

In [16]:
for isin in shareclasses:
    print(f"NAV {isin} :", NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"])
    print("computed NAV: ", D[isin].sum(), "\n")

NAV LU1787059465 : 7248761.65
computed NAV:  6711182.465265324 

NAV LU1787059036 : 1529583.13
computed NAV:  1564946.1181890455 

NAV LU1787061107 : 28005844.68
computed NAV:  25446541.142824866 

NAV LU1787061362 : 14538855.37
computed NAV:  13046766.907595254 

NAV LU1787061529 : 18324115.82
computed NAV:  16419455.847428689 

NAV LU1787060471 : 33209810.28
computed NAV:  29883595.04869681 



#### Erroneous values
As we can see, the computed NAVs does not match the NAVs given by the clients, which means that we are doing something wrong.

###  Investigation
Let's investigate our datas to find where is our error.

#### Check that we have all the instruments
First, we begin by checking that the values of all instruments and the NAVs of all shareclasses both sum to the total NAV of the subfund.

In [20]:
print("total NAV of the subfund: ", NAVs["subfund_total_net_asset"].iloc[0])
print("sum of shareclasses' NAV: ", NAVs["shareclass_total_net_asset_sf_curr"].sum())
print("sum of instruments values: ", instruments["market_value_fund"].sum())

total NAV of the subfund:  20900557.78
sum of shareclasses' NAV:  102856970.93
sum of instruments values:  93072487.53000002


As we can see, there is no mismatch in the datas we are given, this means that the errors comes from our computations.

#### Check for value loss

Now let's check that we are not ommitting any amount and that we do not lose (or create) any money along the way. In order to do that we will check that the sum of distributed values for each instrument sum to 100% of the instrument value.

In [None]:
percentage = D.sum(axis=1) / instruments["market_and_accrued_fund"]
print(percentage.head())
print("erroneous instruments: ", percentage.loc[percentage.round(5)!=1])

All distributions indeed sum to 100% meaning that we do not lose any value during the distribution. Rather, this means that we do not correctly distribute our instruments over the shareclasses.

#### Computing D (correct version)
In fact the discrepencies between our computed NAVs and the NAVs provided by the client comes from our definition of our $d_{i,j}$ values. 

To correctly distribute the values of the instruments over the shareclasses we have to split our instruments into two categories: 
- the instruments which are at the subfund's level and must be distributed over all shareclasses of the subfund
- the instruments which are dedicated to a strict sub-set of shareclasses in the subfund.

let's denote :
- $\mathcal{I}_s$ the set of instruments dedicated to the whole subfund
- $\mathcal{I}_d$ the set of instruments dedicated to a strict subset of shareclasses

For $I_i \in \mathcal{I}_d$ we keep the same definition of $d_{i,j}$:

$$
d_{i,j} = I_i \cdot \frac{\beta_{i,j} S_j}{\sum_{k=0}^{m} \beta_{i,k} S_k}
$$

However for $I_i \in \mathcal{I}_s$ we exclude the values of all instruments $I_k \in \mathcal{I}_d$ which leads to:

$$
d_{i,j} = I_i \cdot \frac{S_j - (\beta_{i,j} \cdot \sum_{k | I_k \in \mathcal{I}_d} d_{k,j})}
                         {\sum_{k | I_k \in \mathcal{I}_s} S_k - \sum_{k | I_k \in \mathcal{I}_d} I_k} 
$$

In [None]:
D = pd.DataFrame(0, index=instruments.index, columns=NAVs.index)
for isin in shareclasses:
    D.loc[BETAS["fund"]==0, isin] = \
        instruments.loc[BETAS["fund"]==0, "market_and_accrued_fund"] \
        * SK.loc[BETAS["fund"]==0, isin] / SK.loc[BETAS["fund"]==0].sum(axis=1)

for isin in shareclasses:
    D.loc[BETAS["fund"]==1, isin] = \
        instruments.loc[BETAS["fund"]==1, "market_and_accrued_fund"] \
        * (SK.loc[BETAS["fund"]==1, isin] \
           - D.loc[((BETAS[isin]==1) & (BETAS["fund"]==0)), isin].sum()) \
        / (SK.loc[BETAS["fund"]==1].sum(axis=1) \
           - instruments.loc[BETAS["fund"]==0, "market_and_accrued_fund"].sum())

D.sort_index(inplace=True)
D.head()

#### Checking the results
To check the results, we simply have to sum the dedicated values $d_{i,j}$ for each shareclass j and we should find the NAV of each shareclass.

In [None]:
for isin in shareclasses:
    print(f"NAV {isin} :", NAVs.loc[isin, "shareclass_total_net_asset_sf_curr"])
    print("computed NAV: ", D[isin].sum(), "\n")

As we can see, we are able to retrieve the NAVs of each shareclass by summing the distributed values of all instruments.

In [None]:
Valuation_weight = D.apply(lambda x: x/NAVs.loc[x.name, "shareclass_total_net_asset_sf_curr"]) 
#pd.DataFrame(index=D.index, columns=D.columns)
Valuation_weight["LU1280365476"]
