## Extra Import

In [1]:
import sys
from typing import *

# Add example_tools path
sys.path.append('../../..')

from example_tools.example_data import dict_list
from example_tools.example_data.dict_list import AstNodeMock, NodeWithInfo, Position, Custom

## Main Functions

In [2]:
from crimson.advanced_dict.converter.dict_list_converter import (
    group_by_key_path,
    group_single_items_by_key_path,
    analyze_path_types
)


---
It is common to create the `List[Dict[str, Any]]` type data.\
The goal of this module is to convert them into `searchable dict-type data`.\
The key of the new data will be the value of a path of the original data.

---
The function `group_single_items_by_key_path` returns\
`Dict[K, Dict[str, Any]]` type data, where `K` is the type, that the input path's value has.

All the values, the path has, must be unique.

In [3]:
ast_node_dict_list: List[NodeWithInfo] = dict_list.ast_node_with_position

In [4]:
new_data: Dict[int, NodeWithInfo] = group_single_items_by_key_path(ast_node_dict_list, "node.lineno", key_type=Type[int])
new_data

{5: {'node': {'lineno': 5},
  'position': {'lineno': 5,
   'end_lineno': 6,
   'col_offset': 0,
   'end_col_offset': 15}},
 25: {'node': {'lineno': 25},
  'position': {'lineno': 25,
   'end_lineno': 26,
   'col_offset': 4,
   'end_col_offset': 12}}}

---
The input `key_type` plays an important rule.\
It can be considered as a filter.\

`"node.lineno"` path is supposed to have only integer, therefore, if we pass `key_type=Type[str]`, we won't get any data theoretically.

In [5]:
maybe_empty_data: Dict[int, NodeWithInfo] = group_single_items_by_key_path(ast_node_dict_list, "node.lineno", key_type=Type[str])
maybe_empty_data

{'9': {'node': {'lineno': '9'},
  'position': {'lineno': 9,
   'end_lineno': 10,
   'col_offset': 0,
   'end_col_offset': 41}}}

---
The integer written in `str` passed the filter.\
It looks critical that it was filtered out when we used `Type[int]`.\

How don't we miss any data?

In [6]:
all_types_the_path_has = analyze_path_types(data=ast_node_dict_list, key_path="node.lineno")


---
It has `{int, str}` types.\
We can use `Union[int, str]` to cover both.\
We finally get all the data.

In [7]:
new_data = group_single_items_by_key_path(data=ast_node_dict_list, key_path="node.lineno", key_type=Type[Union[int, str]])
new_data

{5: {'node': {'lineno': 5},
  'position': {'lineno': 5,
   'end_lineno': 6,
   'col_offset': 0,
   'end_col_offset': 15}},
 '9': {'node': {'lineno': '9'},
  'position': {'lineno': 9,
   'end_lineno': 10,
   'col_offset': 0,
   'end_col_offset': 41}},
 25: {'node': {'lineno': 25},
  'position': {'lineno': 25,
   'end_lineno': 26,
   'col_offset': 4,
   'end_col_offset': 12}}}

---
Wrap the key_type with `typing.Type`, otherwise, it won't be annotated properly.

See the highlighted code by `Pylance`.

<!--StartFragment--><div style="color: #cccccc;background-color: #1f1f1f;font-family: Consolas, 'Courier New', monospace;font-weight: normal;font-size: 14px;line-height: 19px;white-space: pre;"><div><span style="color: #4ec9b0;">list</span><span style="color: #cccccc;">(</span><span style="color: #9cdcfe;">new_data</span><span style="color: #cccccc;">.</span><span style="color: #dcdcaa;">keys</span><span style="color: #cccccc;">())[</span><span style="color: #b5cea8;">0</span><span style="color: #cccccc;">].</span><span style="color: #dcdcaa;">splitlines</span></div><div><span style="color: #4ec9b0;">list</span><span style="color: #cccccc;">(</span><span style="color: #9cdcfe;">new_data</span><span style="color: #cccccc;">.</span><span style="color: #dcdcaa;">keys</span><span style="color: #cccccc;">())[</span><span style="color: #b5cea8;">0</span><span style="color: #cccccc;">].</span><span style="color: #9cdcfe;">imag</span></div></div><!--EndFragment-->


---
Also see the improper example as well.

In [8]:
improperly_highlighted = group_single_items_by_key_path(data=ast_node_dict_list, key_path="node.lineno", key_type=Union[int, str])
improperly_highlighted

{5: {'node': {'lineno': 5},
  'position': {'lineno': 5,
   'end_lineno': 6,
   'col_offset': 0,
   'end_col_offset': 15}},
 '9': {'node': {'lineno': '9'},
  'position': {'lineno': 9,
   'end_lineno': 10,
   'col_offset': 0,
   'end_col_offset': 41}},
 25: {'node': {'lineno': 25},
  'position': {'lineno': 25,
   'end_lineno': 26,
   'col_offset': 4,
   'end_col_offset': 12}}}

<!--StartFragment--><div style="color: #cccccc;background-color: #1f1f1f;font-family: Consolas, 'Courier New', monospace;font-weight: normal;font-size: 14px;line-height: 19px;white-space: pre;"><div><span style="color: #4ec9b0;">list</span><span style="color: #cccccc;">(</span><span style="color: #9cdcfe;">improperly_highlighted</span><span style="color: #cccccc;">.</span><span style="color: #dcdcaa;">keys</span><span style="color: #cccccc;">())[</span><span style="color: #b5cea8;">0</span><span style="color: #cccccc;">].splitlines</span></div><div><span style="color: #4ec9b0;">list</span><span style="color: #cccccc;">(</span><span style="color: #9cdcfe;">improperly_highlighted</span><span style="color: #cccccc;">.</span><span style="color: #dcdcaa;">keys</span><span style="color: #cccccc;">())[</span><span style="color: #b5cea8;">0</span><span style="color: #cccccc;">].imag</span></div></div><!--EndFragment-->


---
The repeated value in the path will cause an error.

In [9]:
complex_data = dict_list.complex_data

try:
    group_single_items_by_key_path(
        data=complex_data,
        key_path="node.name",
        key_type=Type[str]    
    )
except Exception as e:
    print("Error: ", e)

Error:  Multiple items found for key 'function2'. Use group_by_key_path function instead.


---
If you still want to convert the data, use the `group_by_key_path` function as instructed.

In [10]:
new_data = group_by_key_path(
    data=complex_data,
    key_path="node.name",
    key_type=Type[Union[Custom, str]]    
)

hash_solved = {}
for key, item in new_data.items():
    hash_solved[str(key)] = item

hash_solved

{'function1': [{'id': 1,
   'node': {'name': 'function1', 'value': 10, 'type': 'regular'},
   'metadata': Custom(meta1)}],
 'function2': [{'id': 2,
   'node': {'name': 'function2', 'value': '20', 'type': Custom(special)},
   'metadata': 'some meta data'},
  {'id': 5,
   'node': {'name': 'function2',
    'value': [1, '2', 3.0],
    'type': ('tuple', 'type')},
   'metadata': 42}],
 'Custom(custom_function)': [{'id': '4',
   'node': {'name': Custom(custom_function),
    'value': Custom(custom_value),
    'type': None},
   'metadata': {'key': 'value'}}]}

---
Note that, the path `"node.name"` includes `List` type values,\
and it will cause an error because `List` is noy allowed as a key of `Dict`.

Therefore, you have to properly filter out improper types.

In [11]:
paths = analyze_path_types(complex_data, "node.name")
paths

{example_tools.example_data.dict_list.Custom, list, str}

In [12]:
try:
    group_by_key_path(
        data=complex_data,
        key_path="node.name",
        key_type=Type[Union[Custom, List, str]]    
    )
except Exception as e:
    print("Error: ", e)

Error:  unhashable type: 'list'


## Minor Functions

In [13]:
from crimson.advanced_dict.converter.dict_list_converter import (
    get_nested_value,
    create_type_converter,
)
