Trying out to convert a 3d array into a flattened array for power bi consumption.

Start by setting up 2 arrays representing the view template hash tables from 2 models

In [1]:
MODEL_NAMES = ["model a", "model b"]
# Define row and column headers
ROW_HEADERS_A = ["Cat 1", "Cat 2", "Cat 3"]
COLUMN_HEADERS_A = ["template 1", "template 2", "template 3"]

ROW_HEADERS_B = ["Cat 1", "Cat 3", "Cat 4"]
COLUMN_HEADERS_B = ["template 2", "template 5", "template 6"]

# define row and column values only
ARRAY_MODEL_A = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

ARRAY_MODEL_B = [
    [10, 11, 12],
    [13, 14, 15],
    [16, 17, 18]
]

combine header lists into lists of unique values

In [2]:

# Convert the lists to sets to remove duplicates, then combine and convert back to a list
MERGED_ROW_HEADERS = sorted(list(set(ROW_HEADERS_A) | set(ROW_HEADERS_B)))
print(MERGED_ROW_HEADERS)
MERGED_COLUMN_HEADERS = sorted(list(set(COLUMN_HEADERS_A) | set(COLUMN_HEADERS_B)))
print (MERGED_COLUMN_HEADERS)



['Cat 1', 'Cat 2', 'Cat 3', 'Cat 4']
['template 1', 'template 2', 'template 3', 'template 5', 'template 6']


build an array of the final size and use as a default cell value -1

In [3]:
def get_padded_default_array(
    merged_row_headers,
    merged_column_headers,
):

    # Create a new padded 2D array
    padded_array_model_a = [
        [-1 for _ in merged_column_headers] for _ in merged_row_headers
    ]
    return padded_array_model_a

# Find the indices for row and column headers in the merged headers
ROW_INDICES_A = [MERGED_ROW_HEADERS.index(row) for row in ROW_HEADERS_A]
COLUMN_INDICES_A = [MERGED_COLUMN_HEADERS.index(col) for col in COLUMN_HEADERS_A]

ROW_INDICES_B = [MERGED_ROW_HEADERS.index(row) for row in ROW_HEADERS_B]
COLUMN_INDICES_B = [MERGED_COLUMN_HEADERS.index(col) for col in COLUMN_HEADERS_B]

PADDED_ARRAY_MODEL_A = get_padded_default_array(
    merged_row_headers=MERGED_ROW_HEADERS,
    merged_column_headers=MERGED_COLUMN_HEADERS,
)

#PADDED_ARRAY_MODEL_B = PADDED_ARRAY_MODEL_A[:]

PADDED_ARRAY_MODEL_B = get_padded_default_array(
    merged_row_headers=MERGED_ROW_HEADERS,
    merged_column_headers=MERGED_COLUMN_HEADERS,
)

# Print the padded array
for row in PADDED_ARRAY_MODEL_A:
    print(row)

print('\n')
for row in PADDED_ARRAY_MODEL_B:
    print(row)


[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]


[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]


update cell values based on the original array

In [4]:
def update_default_array_values(row_indices, col_indices, default_array, value_array):
    # Fill in the values from array_model_a
    for i, row_index in enumerate(row_indices):
        for j, col_index in enumerate(col_indices):
            default_array[row_index][col_index] = value_array[i][j]
    return default_array


updated_array_model_a = update_default_array_values(
    row_indices=ROW_INDICES_A,
    col_indices=COLUMN_INDICES_A,
    default_array=PADDED_ARRAY_MODEL_A,
    value_array=ARRAY_MODEL_A,
)

# Print the padded array
'''
for row in PADDED_ARRAY_MODEL_A:
    print(row)
print("\n")
'''

updated_array_model_b = update_default_array_values(
    row_indices=ROW_INDICES_B,
    col_indices=COLUMN_INDICES_B,
    default_array=PADDED_ARRAY_MODEL_B,
    value_array=ARRAY_MODEL_B,
)
# Print the padded array
print("A\n")
for row in PADDED_ARRAY_MODEL_A:
    print(row)

print("\nB\n")
# Print the padded array
for row in PADDED_ARRAY_MODEL_B:
    print(row)

A

[1, 2, 3, -1, -1]
[4, 5, 6, -1, -1]
[7, 8, 9, -1, -1]
[-1, -1, -1, -1, -1]

B

[-1, 10, -1, 11, 12]
[-1, -1, -1, -1, -1]
[-1, 13, -1, 14, 15]
[-1, 16, -1, 17, 18]


combine into a 3D array

In [5]:
# Your 3D array
array_3d = [
        PADDED_ARRAY_MODEL_A,
        PADDED_ARRAY_MODEL_B
]
print(array_3d)

[[[1, 2, 3, -1, -1], [4, 5, 6, -1, -1], [7, 8, 9, -1, -1], [-1, -1, -1, -1, -1]], [[-1, 10, -1, 11, 12], [-1, -1, -1, -1, -1], [-1, 13, -1, 14, 15], [-1, 16, -1, 17, 18]]]


In [6]:
flattened_data=[]
for model_name, layer in enumerate(array_3d):
    for category, row in enumerate(layer):
        for view_template, hash_value in enumerate(row):
            flattened_data.append(
                {
                    "view_template": MERGED_COLUMN_HEADERS[view_template],
                    "category": MERGED_ROW_HEADERS[category],
                    "model_name": MODEL_NAMES[model_name],
                    "hash_value": hash_value,
                }
            )

In [7]:
print(flattened_data)

[{'view_template': 'template 1', 'category': 'Cat 1', 'model_name': 'model a', 'hash_value': 1}, {'view_template': 'template 2', 'category': 'Cat 1', 'model_name': 'model a', 'hash_value': 2}, {'view_template': 'template 3', 'category': 'Cat 1', 'model_name': 'model a', 'hash_value': 3}, {'view_template': 'template 5', 'category': 'Cat 1', 'model_name': 'model a', 'hash_value': -1}, {'view_template': 'template 6', 'category': 'Cat 1', 'model_name': 'model a', 'hash_value': -1}, {'view_template': 'template 1', 'category': 'Cat 2', 'model_name': 'model a', 'hash_value': 4}, {'view_template': 'template 2', 'category': 'Cat 2', 'model_name': 'model a', 'hash_value': 5}, {'view_template': 'template 3', 'category': 'Cat 2', 'model_name': 'model a', 'hash_value': 6}, {'view_template': 'template 5', 'category': 'Cat 2', 'model_name': 'model a', 'hash_value': -1}, {'view_template': 'template 6', 'category': 'Cat 2', 'model_name': 'model a', 'hash_value': -1}, {'view_template': 'template 1', 'ca