In [5]:
import json
import requests
from decimal import Decimal
import re  # Import regular expressions
import io
from rich import print as rprint
import pandas as pd
import pprint

genesis_url = 'https://www-genesis.destatis.de/genesisWS/rest/2020/data/cube?'
user_id = 'DE17T29R57'
password = '4Bf/3Ap)3]r2,,h'

In [7]:
# Instandhaltungsindex Heizungsanlagen
cubecode = '61261BV002'
areatype = 'all'
category = 'all'
content = ''
start_year = ''
classifier1 = 'STEMW1'
classifier2 = 'BAUIN2'
classifier3 = 'BAUIN1'
key1 = 'STEMW1'
key2 = 'BPI4'
key3 = 'BPNB525'
lang = 'en'

# Destatis Base Url
url = (f"{genesis_url}username={user_id}&password={password}&language=de&name={cubecode}&area={areatype}"
       f"&compress=true&contents={content}&startyear={start_year}")

# Add Classifyers to url
if classifier1:
       url += f'&classifyingvariable1={classifier1}'
if key1:
       url += f'&classifyingkey1={key1}'

if classifier2:
       url += f'&classifyingvariable2={classifier2}'
if key2:
       url += f'&classifyingkey2={key2}'

if classifier3:
       url += f'&classifyingvariable3={classifier3}'
if key3:
       url += f'&classifyingkey3={key3}'

# request
response = requests.get(url)
data = json.loads(response.text)["Object"]['Content'].split('\n')


# Compile a regex pattern to find sequences of digits ending with "=100"
info_pattern = re.compile(r'\d+=100')

# Extract only the parts of lines that match the pattern "*=100"
info_matches = []
for line in data:
    matches = info_pattern.findall(line)  # find all matches in a line
    info_matches.extend(matches)  # add found matches to the list

# Convert info_matches to a single string to append to each filtered line
info_matches_str = ', '.join(info_matches)

# Filter out the relevant lines and append the matches string to each
filtered_response = [line + " " + info_matches_str for line in data if 'D' in line and 'e' in line and ('MONAT' in line or 'QUART' in line)]

# Creating Mapping Dict, because this indice come in Quarters but named as monthes. So we need to transfrom the naming so it fits our naming standard for quarterly values
mapping_dict = {
    "MONAT02": "QUART1",
    "MONAT05": "QUART2",
    "MONAT08": "QUART3",
    "MONAT11": "QUART4"
    }

# Split each line into its components and extract the relevant information
data = [{'field_D': parts[0],
         'field_DG': parts[1],
         'classifyingkey1': parts[2],
         'classifyingkey2': parts[3],
         'classifyingkey3': parts[4],
         'period': parts[5],
         'year': int(parts[6]),
         'value': float(parts[7]),
         'field_e': parts[8],
         'base': info_matches_str
        }
        for parts in (line.split(';') for line in filtered_response)]

# Create a DataFrame from the extracted data
df = pd.DataFrame(data)

# Determine if the data is monthly or quarterly based on the unique values in the 'period' column
if df['period'].str.contains('Q').any():
    period_key = 'quarter'
else:
    period_key = 'month'

# Create result dict
result = {'cubeCode': cubecode, 'content': content, 'classifyingVar1': classifier1, 'classifyingKey1': key1, 'data': []}
for year, group in df.groupby('year'):
    year_data = {
        'year': year,
        'df': [{
            period_key: row['period'],
            'value': row['value'],
            'base': row['base']
            } for _, row in group.iterrows()]
    }
    result['data'].append(year_data)

j = json.dumps(result, indent=4)
rprint(j)