In [18]:
import os
import re
import pandas as pd

# Load the KPI value table
values_df = pd.read_csv("../results-pipeline/kpi-value-table.csv", sep=";", encoding="utf-8")

# Load the wskaznik (indicator) dictionary
wskaznik_dict_df = pd.read_csv("../results-pipeline/wskaznik_dictionary.csv", sep=";", encoding="utf-8")

# Display basic info about the loaded data
print(f"Values DataFrame shape: {values_df.shape}")
print(f"Wskaznik Dictionary shape: {wskaznik_dict_df.shape}")
print("\nFirst few rows of values_df:")
print(values_df.head())
print("\nFirst few rows of wskaznik_dict_df:")
print(wskaznik_dict_df.head())

values_df.dtypes

Values DataFrame shape: (556911, 4)
Wskaznik Dictionary shape: (33, 2)

First few rows of values_df:
    rok       wartosc  WSKAZNIK_INDEX  PKD_INDEX
0  2005     46 396,00               3     1756.0
1  2005     35 860,00              15     1756.0
2  2005  1 679 774,30               5     1756.0
3  2005           NaN              16     1756.0
4  2005  1 614 314,24               4     1756.0

First few rows of wskaznik_dict_df:
   WSKAZNIK_INDEX                                           WSKAZNIK
0               0                   C Środki pieniężne i pap. wart. 
1               1                             CF Nadwyżka finansowa 
2               2                                  DEPR Amortyzacja 
3               3                 EN Liczba jednostek gospodarczych 
4               4  GS (I) Przychody netto ze sprzedaży i zrównane...


rok                 int64
wartosc            object
WSKAZNIK_INDEX      int64
PKD_INDEX         float64
dtype: object

In [20]:
# First, let's create a mapping of indicator codes to their indices
indicator_mapping = {
    'C': 0,      # Środki pieniężne i pap. wart.
    'CF': 1,     # Nadwyżka finansowa
    'DEPR': 2,   # Amortyzacja
    'EN': 3,     # Liczba jednostek gospodarczych
    'GS': 5,     # Przychody ogółem
    'INV': 6,    # Zapasy
    'IO': 7,     # Wartość nakładów inwestycyjnych
    'IP': 8,     # Odsetki do zapłacenia
    'LTC': 9,    # Długoterminowe kredyty bankowe
    'LTL': 10,   # Zobowiązania długoterminowe
    'NP': 11,    # Wynik finansowy netto (zysk netto)
    'NWC': 12,   # Kapitał obrotowy
    'OFE': 13,   # Pozostałe koszty finansowe
    'OP': 14,    # Wynik na działalności operacyjnej
    'PEN': 15,   # Liczba rentownych jednostek gospodarczych
    'PNPM': 16,  # Przychody netto
    'POS': 17,   # Wynik na sprzedaży
    'PPO': 18,   # Pozostałe przychody operacyjne
    'REC': 20,   # Należności krótkoterminowe
    'STC': 21,   # Krótkoterminowe kredyty bankowe
    'STL': 22,   # Zobowiązania krótkoterminowe
    'TC': 23     # Koszty ogółem
}

# Convert 'wartosc' column to numeric (handle Polish number format with commas and non-breaking spaces)
def parse_polish_number(value):
    """Convert Polish number format to float."""
    if pd.isna(value) or value == '':
        return None
    # Remove both regular spaces and non-breaking spaces (\xa0), then replace comma with dot
    return float(str(value).replace(' ', '').replace('\xa0', '').replace(',', '.'))

values_df['wartosc_numeric'] = values_df['wartosc'].apply(parse_polish_number)

# Pivot the data to have indicators as columns for easier calculation
pivot_df = values_df.pivot_table(
    index=['rok', 'PKD_INDEX'],
    columns='WSKAZNIK_INDEX',
    values='wartosc_numeric',
    aggfunc='first'
).reset_index()

# Define the 8 new indicators starting from index 1000
new_indicators = []

# 1. Marża netto = NP/PNPM (index 1000)
pivot_df['indicator_1000'] = pivot_df[indicator_mapping['NP']] / pivot_df[indicator_mapping['PNPM']]
new_indicators.append({'WSKAZNIK_INDEX': 1000, 'WSKAZNIK': 'Marża netto (NP/PNPM)'})

# 2. Marża operacyjna = OP/PNPM (index 1001)
pivot_df['indicator_1001'] = pivot_df[indicator_mapping['OP']] / pivot_df[indicator_mapping['PNPM']]
new_indicators.append({'WSKAZNIK_INDEX': 1001, 'WSKAZNIK': 'Marża operacyjna (OP/PNPM)'})

# 3. Wskaźnik bieżącej płynności = (C+REC+INV)/STL (index 1002)
pivot_df['indicator_1002'] = (
    pivot_df[indicator_mapping['C']] + 
    pivot_df[indicator_mapping['REC']] + 
    pivot_df[indicator_mapping['INV']]
) / pivot_df[indicator_mapping['STL']]
new_indicators.append({'WSKAZNIK_INDEX': 1002, 'WSKAZNIK': 'Wskaźnik bieżącej płynności ((C+REC+INV)/STL)'})

# 4. Wskaźnik szybki = (C+REC)/STL (index 1003)
pivot_df['indicator_1003'] = (
    pivot_df[indicator_mapping['C']] + 
    pivot_df[indicator_mapping['REC']]
) / pivot_df[indicator_mapping['STL']]
new_indicators.append({'WSKAZNIK_INDEX': 1003, 'WSKAZNIK': 'Wskaźnik szybki ((C+REC)/STL)'})

# 5. Wskaźnik zadłużenia = (STL+LTL)/PNPM (index 1004)
pivot_df['indicator_1004'] = (
    pivot_df[indicator_mapping['STL']] + 
    pivot_df[indicator_mapping['LTL']]
) / pivot_df[indicator_mapping['PNPM']]
new_indicators.append({'WSKAZNIK_INDEX': 1004, 'WSKAZNIK': 'Wskaźnik zadłużenia ((STL+LTL)/PNPM)'})

# 6. Pokrycie odsetek = OP/IP (index 1005)
pivot_df['indicator_1005'] = pivot_df[indicator_mapping['OP']] / pivot_df[indicator_mapping['IP']]
new_indicators.append({'WSKAZNIK_INDEX': 1005, 'WSKAZNIK': 'Pokrycie odsetek (OP/IP)'})

# 7. Rotacja należności = PNPM/REC (index 1006)
pivot_df['indicator_1006'] = pivot_df[indicator_mapping['PNPM']] / pivot_df[indicator_mapping['REC']]
new_indicators.append({'WSKAZNIK_INDEX': 1006, 'WSKAZNIK': 'Rotacja należności (PNPM/REC)'})

# 8. Cash flow margin = CF/PNPM (index 1007)
pivot_df['indicator_1007'] = pivot_df[indicator_mapping['CF']] / pivot_df[indicator_mapping['PNPM']]
new_indicators.append({'WSKAZNIK_INDEX': 1007, 'WSKAZNIK': 'Cash flow margin (CF/PNPM)'})

# Convert back to long format
new_indicators_data = []

for indicator_idx in range(1000, 1008):
    col_name = f'indicator_{indicator_idx}'
    temp_df = pivot_df[['rok', 'PKD_INDEX', col_name]].copy()
    temp_df.columns = ['rok', 'PKD_INDEX', 'wartosc']
    temp_df['WSKAZNIK_INDEX'] = indicator_idx
    new_indicators_data.append(temp_df)

# Combine all new indicators
new_indicators_df = pd.concat(new_indicators_data, ignore_index=True)

# Format the values back to Polish format (with comma as decimal separator)
new_indicators_df['wartosc'] = new_indicators_df['wartosc'].apply(
    lambda x: str(x).replace('.', ',') if pd.notna(x) else ''
)

# Select only the columns we need
new_indicators_df = new_indicators_df[['rok', 'wartosc', 'WSKAZNIK_INDEX', 'PKD_INDEX']]

# Combine with original data
combined_values_df = pd.concat([values_df[['rok', 'wartosc', 'WSKAZNIK_INDEX', 'PKD_INDEX']], 
                                new_indicators_df], ignore_index=True)

# Update the wskaznik dictionary
new_wskaznik_dict = pd.DataFrame(new_indicators)
combined_wskaznik_dict = pd.concat([wskaznik_dict_df, new_wskaznik_dict], ignore_index=True)

# Display summary
print(f"Original values_df shape: {values_df.shape}")
print(f"New indicators added: {len(new_indicators_df)}")
print(f"Combined values_df shape: {combined_values_df.shape}")
print(f"\nOriginal wskaznik dictionary entries: {len(wskaznik_dict_df)}")
print(f"New wskaznik dictionary entries: {len(combined_wskaznik_dict)}")
print("\n" + "="*80)
print("NEW INDICATORS ADDED:")
print("="*80)
for idx, row in new_wskaznik_dict.iterrows():
    print(f"Index {row['WSKAZNIK_INDEX']}: {row['WSKAZNIK']}")

# Show sample of new indicators
print("\n" + "="*80)
print("SAMPLE OF CALCULATED VALUES (first 10 rows of indicator 1000 - Marża netto):")
print("="*80)
sample = new_indicators_df[new_indicators_df['WSKAZNIK_INDEX'] == 1000].head(10)
print(sample.to_string(index=False))

Original values_df shape: (556911, 5)
New indicators added: 136248
Combined values_df shape: (693159, 4)

Original wskaznik dictionary entries: 33
New wskaznik dictionary entries: 41

NEW INDICATORS ADDED:
Index 1000: Marża netto (NP/PNPM)
Index 1001: Marża operacyjna (OP/PNPM)
Index 1002: Wskaźnik bieżącej płynności ((C+REC+INV)/STL)
Index 1003: Wskaźnik szybki ((C+REC)/STL)
Index 1004: Wskaźnik zadłużenia ((STL+LTL)/PNPM)
Index 1005: Pokrycie odsetek (OP/IP)
Index 1006: Rotacja należności (PNPM/REC)
Index 1007: Cash flow margin (CF/PNPM)

SAMPLE OF CALCULATED VALUES (first 10 rows of indicator 1000 - Marża netto):
 rok wartosc  WSKAZNIK_INDEX  PKD_INDEX
2005                    1000        0.0
2005                    1000        1.0
2005                    1000        2.0
2005                    1000        6.0
2005                    1000       14.0
2005                    1000       16.0
2005                    1000       23.0
2005                    1000       25.0
2005            

In [17]:
# Save the combined data back to the original files

# Save combined values (with new indicators) to kpi-value-table.csv
combined_values_df.to_csv("../results-pipeline/kpi-value-table.csv", sep=";", index=False, encoding="utf-8")
print(f"✓ Saved combined values to: ../results-pipeline/kpi-value-table.csv")
print(f"  Total rows: {len(combined_values_df)}")

# Save combined wskaznik dictionary to wskaznik_dictionary.csv
combined_wskaznik_dict.to_csv("../results-pipeline/wskaznik_dictionary.csv", sep=";", index=False, encoding="utf-8")
print(f"✓ Saved combined dictionary to: ../results-pipeline/wskaznik_dictionary.csv")
print(f"  Total indicators: {len(combined_wskaznik_dict)}")

print("\n" + "="*80)
print("FILES SUCCESSFULLY UPDATED!")
print("="*80)
print(f"New indicators (1000-1007) have been added to both files.")

✓ Saved combined values to: ../results-pipeline/kpi-value-table.csv
  Total rows: 556911
✓ Saved combined dictionary to: ../results-pipeline/wskaznik_dictionary.csv
  Total indicators: 33

FILES SUCCESSFULLY UPDATED!
New indicators (1000-1007) have been added to both files.
