Trying out to convert a 3d array into a flattened array for power bi consumption.

Start by setting up 2 arrays representing the view template hash tables from 2 models

In [1]:
# Define row and column headers
row_headers_a = ["Cat 1", "Cat 2", "Cat 3"]
col_headers_a = ["template 1", "template 2", "template 3"]

row_headers_b = ["Cat 1", "Cat 3", "Cat 4"]
col_headers_b = ["template 2", "template 5", "template 6"]

# define row and column values only
arrays_model_a = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

arrays_model_b = [
    [10, 11, 12],
    [13, 14, 15],
    [16, 17, 18]
]

combine header lists into lists of unique values

In [2]:

# Convert the lists to sets to remove duplicates, then combine and convert back to a list
merged_row_headers = sorted(list(set(row_headers_a) | set(row_headers_b)))
print(merged_row_headers)
merged_col_headers = sorted(list(set(col_headers_a) | set(col_headers_b)))
print (merged_col_headers)



['Cat 1', 'Cat 2', 'Cat 3', 'Cat 4']
['template 1', 'template 2', 'template 3', 'template 5', 'template 6']


build an array of the final size and use as a default cell value -1

In [3]:
def get_padded_default_array(
    merged_row_headers,
    merged_column_headers,
):

    # Create a new padded 2D array
    padded_array_model_a = [
        [-1 for _ in merged_column_headers] for _ in merged_row_headers
    ]
    return padded_array_model_a

# Find the indices for row and column headers in the merged headers
row_indices = [merged_row_headers.index(row) for row in row_headers_a]
col_indices = [merged_col_headers.index(col) for col in col_headers_a]

padded_array_model_a = get_padded_default_array(
    merged_row_headers=merged_row_headers,
    merged_column_headers=merged_col_headers,
)

padded_array_model_b = list(padded_array_model_a)

# Print the padded array
for row in padded_array_model_a:
    print(row)

print('\n')
for row in padded_array_model_b:
    print(row)


[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]


[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]
[-1, -1, -1, -1, -1]


update cell values based on the original array

In [4]:
def update_default_array_values(row_indices, col_indices, default_array, value_array):
    # Fill in the values from array_model_a
    for i, row_index in enumerate(row_indices):
        for j, col_index in enumerate(col_indices):
            default_array[row_index][col_index] = value_array[i][j]
    return default_array


updated_array_model_a = update_default_array_values(
    row_indices=row_indices,
    col_indices=col_indices,
    default_array=padded_array_model_a,
    value_array=arrays_model_a,
)

# Print the padded array
for row in padded_array_model_a:
    print(row)
print("\n")

updated_array_model_b = update_default_array_values(
    row_indices=row_indices,
    col_indices=col_indices,
    default_array=padded_array_model_b,
    value_array=arrays_model_b,
)
# Print the padded array
for row in padded_array_model_a:
    print(row)

print("\n")
# Print the padded array
for row in padded_array_model_b:
    print(row)

[1, 2, 3, -1, -1]
[4, 5, 6, -1, -1]
[7, 8, 9, -1, -1]
[-1, -1, -1, -1, -1]


[10, 11, 12, -1, -1]
[13, 14, 15, -1, -1]
[16, 17, 18, -1, -1]
[-1, -1, -1, -1, -1]


[10, 11, 12, -1, -1]
[13, 14, 15, -1, -1]
[16, 17, 18, -1, -1]
[-1, -1, -1, -1, -1]


combine into a 3D array

In [5]:
# Your 3D array
array_3d = [
    [arrays_model_a, arrays_model_b]
]

In [6]:
model_dic ={
    0: "model_a",
    1: "model_b",
    2: "model_c"
}

In [7]:
category_dic= {
    0: "cat_a",
    1: "cat_b",
    2: "cat_c"
}

In [8]:
view_template_dic= {
    0: "template_a",
    1: "template_b",
    2: "template_c"
}

In [9]:
# Flatten the 3D array and create a DataFrame
flattened_data = []

In [10]:
for model_name, layer in enumerate(array_3d):
    for category, row in enumerate(layer):
        for view_template, hash_value in enumerate(row):
            flattened_data.append(
                {
                    "view_template": view_template_dic[view_template],
                    "category": category_dic[category],
                    "model_name": model_dic[model_name],
                    "hash_value": hash_value,
                }
            )

In [11]:
print(flattened_data)

[{'view_template': 'template_a', 'category': 'cat_a', 'model_name': 'model_a', 'hash_value': [1, 2, 3]}, {'view_template': 'template_b', 'category': 'cat_a', 'model_name': 'model_a', 'hash_value': [4, 5, 6]}, {'view_template': 'template_c', 'category': 'cat_a', 'model_name': 'model_a', 'hash_value': [7, 8, 9]}, {'view_template': 'template_a', 'category': 'cat_b', 'model_name': 'model_a', 'hash_value': [10, 11, 12]}, {'view_template': 'template_b', 'category': 'cat_b', 'model_name': 'model_a', 'hash_value': [13, 14, 15]}, {'view_template': 'template_c', 'category': 'cat_b', 'model_name': 'model_a', 'hash_value': [16, 17, 18]}]
