In [2]:
import pandas as pd
import numpy as np
from pyesef.const import PATH_PROJECT_ROOT, CSV_SEPARATOR
from pyesef.helpers.read_facts import _get_statement_item_group, _get_is_total

In [3]:
def char_len(x, fixed_n):
    '''set string x to fixed_n character, prepend with 'xxx' if short'''
    if isinstance(x, float):
        return x

    if len(x) > fixed_n: 
        return x[:fixed_n] 
    elif len(x) < fixed_n: 
        return ' ' * (fixed_n - len(x)) + x 
    return x
 

### Load and cleanup data

In [4]:
df = pd.read_csv(f"{PATH_PROJECT_ROOT}/output.csv", sep=CSV_SEPARATOR)

df['statement_item_group'] = df.apply(lambda row: _get_statement_item_group(local_name=row['local_name']), axis=1)
df['is_total'] = df.apply(lambda row: _get_is_total(local_name=row['local_name']), axis=1)
df['legal_name'] = df['legal_name'].apply(lambda x: char_len(x, 40))

# Remove non-unique values
df = df.drop_duplicates(subset=df.columns)

# Tidy up formatting of values
df["value"] = df["value"] / 1e6


### Check column names to group

In [5]:
#df.query(
#    'statement_item_group.str.contains("Cost")'
#).statement_item_group.unique()

#### Filter

In [6]:
filtered_df = df.query(
    'period_end in ("2021-12-31")'
    #' & statement_item_group in ("Revenue", "CashAndCashEquivalents", "CostOfSales", "Assets")'
    ' & lei=="2138001H6FCSZBP26351"'
    ' & is_total == False'
    ' & statement_type != "other_comprehensive_income"'
    # We don't need this information
    ' & local_name not in ("ProfitLossAttributableToOwnersOfParent", "ProfitLossAttributableToNoncontrollingInterests")'
)


In [7]:
table = pd.pivot_table(
    filtered_df,
    values='value',
    index=['lei', "legal_name", "period_end", "statement_type", "statement_item_group", "local_name"],
    #columns=['statement_item_group'],
    aggfunc=np.sum
).reset_index()

# Ratios
#table["gross_margin"] = 1-table["CostOfSales"] / table["Revenue"]
#table["cash_to_ta"] = table["CashAndCashEquivalents"] / table["Assets"]
t = table.sort_values(by=['statement_type', "statement_item_group"], ascending=False, na_position='last')
print(t.to_markdown())


|    | lei                  | legal_name           | period_end   | statement_type      | statement_item_group                                                      | local_name                                                                |   value |
|---:|:---------------------|:---------------------|:-------------|:--------------------|:--------------------------------------------------------------------------|:--------------------------------------------------------------------------|--------:|
| 45 | 2138001H6FCSZBP26351 | ITAB Shop Concept AB | 2021-12-31   | income_statement    | SellingGeneralAdminExpense                                                | AdministrativeExpense                                                     |     338 |
| 46 | 2138001H6FCSZBP26351 | ITAB Shop Concept AB | 2021-12-31   | income_statement    | SellingGeneralAdminExpense                                                | OtherExpenseByFunction                                                    |   

### Analyse count per item name

In [8]:
a = filtered_df.groupby(["statement_type", "local_name"])["lei"].count().reset_index(name='count').sort_values(['count'], ascending=False)
print(a.to_markdown())

|    | statement_type      | local_name                                                                |   count |
|---:|:--------------------|:--------------------------------------------------------------------------|--------:|
| 13 | balance_sheet       | EquipmentToolsAndInstallation                                             |       2 |
|  0 | balance_sheet       | AccrualsAndDeferredIncome                                                 |       1 |
| 25 | balance_sheet       | NoncurrentPortionOfNoncurrentLoansReceived                                |       1 |
| 27 | balance_sheet       | OtherCurrentBorrowingsAndCurrentPortionOfOtherNoncurrentBorrowings        |       1 |
| 28 | balance_sheet       | OtherCurrentFinancialAssets                                               |       1 |
| 29 | balance_sheet       | OtherCurrentFinancialLiabilities                                          |       1 |
| 30 | balance_sheet       | OtherLongtermProvisions                            