# Make all in one package

milsymbol.js: <https://github.com/spatialillusions/milsymbol/releases/tag/v2.2.0>

In [1]:
import json
import pathlib
import pandas as pd

## Get data from <https://github.com/spatialillusions/mil-std-2525>

TODO: <https://github.com/spatialillusions/stanag-app6>

### Get entities

In [2]:
result = dict()
# you need to clone this repo to work
for i in pathlib.Path('../mil-std-2525/tsv-tables/2525d/').glob('*.tsv'):
    # get only entities
    if 'sector' in i.stem:
        continue
    try:
        x = dict()
        # create hierarchy JSON out of sparse Entity Collection dataframe
        #    A1 - - 1
        #    A2 - - 2  => {EC: {A1: 1, A2: {"": 2, B2: {"": 3, C2: 4}}}}
        #    - B2 - 3
        #    - - C2 4
        for _, row in pd.read_csv(i, delimiter='\t', keep_default_na=False, dtype='str').iterrows():
            # level A
            if row['Entity']:
                current_entity =  row['Entity']
                x[current_entity] = row['Code']
            # level B
            elif row['Entity Type']:
                current_entity_type = row['Entity Type']
                # make upper level dict, if it contains more than 1 value
                if not isinstance(x[current_entity], dict):
                    x[current_entity] = {"": x[current_entity]}
                # drop those
                if current_entity_type != '{Reserved for future use}':
                    x[current_entity][current_entity_type] = row['Code']
            # level C
            elif row['Entity Subtype']:
                current_entity_subtype = row['Entity Subtype']
                # make upper level dict, if it contains more than 1 value
                if not isinstance(x[current_entity][current_entity_type], dict):
                    x[current_entity][current_entity_type] = {"": x[current_entity][current_entity_type]}
                x[current_entity][current_entity_type][current_entity_subtype] = row['Code']
        result[i.stem] = x
    # 3 tab errors in Control Measures.tsv
    except Exception as e:
        print(e)
        print(i)

### Get modifiers

In [3]:
# get only files with modifiers
mods = [i for i in pathlib.Path('../mil-std-2525/tsv-tables/2525d/').glob('*.tsv') if 'sector' in i.stem]

# add 'modifier_1' and 'modifier_2' dicts to some Entity Collections
#   find which EC have modifiers
for k in result.keys():
    for i in mods:
        if k + ' sector' in i.stem:
            # read modifier file
            x = dict()
            _ = pd.read_csv(i, delimiter='\t', keep_default_na=False, dtype='str') 
            # detect it modifier_1 or modifier_2
            n = '1' if ('1' in i.stem) else '2'
            modname = 'First Modifier' if (n == '1') else 'Second Modifier'
            try:
                for _, row in _.iterrows():
                    # drop some
                    if row[modname] not in ['{Reserved for future use}', 'Version Extension Flag']:
                        x.update({row[modname]: row['Code']})
                result[k][f'modifier_{n}'] = x
            # we clean
            except Exception as e:
                print(e)
                print(i.stem)

In [4]:
# {EC: {A1: 1, A2: {"": 2, B2: {"": 3, C2: 4}}}} => 
#   EC.A1: 1
#   EC.A2: 2
#   EC.A2.B2: 3
#   EC.A2.B2.C2: 4
with open('../json/set_b.json', 'w') as fp:
    x = pd.json_normalize(result).T.to_dict()[0]
    json.dump(x, fp, ensure_ascii=False, indent=2)

### Set A from <https://www.jcs.mil/portals/36/documents/doctrine/other_pubs/ms_2525d.pdf>

In [5]:
# from 005-kba notebook
with open('set_a_raw.json', 'r') as fp:
    tmp = json.load(fp)
x = pd.json_normalize(tmp).T.to_dict()[0]

with open('../json/set_a.json', 'w') as fp:
    json.dump(x, fp, ensure_ascii=False, indent=2)

## Class to work with data above

In [6]:
import json
import pathlib
import rapidfuzz
from py_mini_racer import MiniRacer
import cairosvg


class SIDCFuzzySearcher:
    def __init__(self, path_to_set_a: pathlib.Path, path_to_set_b: pathlib.Path, path_to_milsymbolsjs: pathlib.Path):
        self._data_a = self._load_json(path_to_set_a)
        self._data_b = self._load_json(path_to_set_b)
        self._ctx = self._load_js(path_to_milsymbolsjs)
        self._defaults_set_a = {'1': '1', '2': '0', '3': '0', '4': '1', '56': '00', '7': '0', '8': '0', '910': '00'}

    def _load_json(self, path: pathlib.Path) -> dict:
        with open(path, 'r') as fp:
            return json.load(fp)

    def _load_js(self, path: pathlib.Path) -> MiniRacer:
        # get script from <https://github.com/spatialillusions/milsymbol/releases/tag/v2.2.0>
        with open(path, 'r') as fp:
            txt = fp.read()
        ctx = MiniRacer()
        ctx.eval(txt)
        return ctx
        
    def _search_a(self, query: str, n=1) -> str:
        """ query in set A treated like few separate words
        """
        choices_a = self._data_a.keys()
        # try to find each word separately, get uniq results
        findings = set([self._fuzzy_search(q, choices_a, n) for q in query.split()])
        # update default values
        answer_a = self._defaults_set_a.copy()
        # '3.Reality' is a key to self._data_a where '3' is a idx of set_a
        #   self._data_a['3.Reality'] -> '0' 
        answer_a.update({f.split('.')[0]: self._data_a[f] for f in findings if f})
        # format an answer string
        a = f"{answer_a['1']}{answer_a['2']}{answer_a['3']}{answer_a['4']}{answer_a['56']}{answer_a['7']}{answer_a['8']}{answer_a['910']}"
        return a
    
    def _search_b(self, query: str, mod1: str = '', mod2: str = '', n=1) -> str:
        """ query in set B treated like a single sentence
            each modifier is a separate single word/sentence
        """
        choices = self._data_b.keys()
        # try to find entity
        #   'Land unit.Fires.Mortar.Armored/Mechanized/Tracked'
        selected_key = self._fuzzy_search(query, choices, n)
        #   '130801'
        answer_b = self._data_b[selected_key] if selected_key else '000000'
        #   'Land unit' -- prefix of selected_key
        selected_b = selected_key.split('.')[0] if selected_key else selected_key
        # try to find modifiers
        answer_mod1 = self._search_b_mode(mod1, selected_b, suffix='.modifier_1')
        answer_mod2 = self._search_b_mode(mod2, selected_b, suffix='.modifier_2')
        # format an answer string
        b = answer_b + answer_mod1 + answer_mod2
        return b

    def _search_b_mode(self, query: str | None, selected_b: str | None, suffix='.modifier_1', n=1) -> str:
        if query and selected_b:
            # ['Land unit.modifier_1.Attack', ..]
            choices = [k for k in self._data_b.keys() if selected_b + suffix in k]
            # 'Land unit.modifier_1.Attack'
            x = self._fuzzy_search(query, choices, n)
            # '03'
            answer_mod = self._data_b[x] if x else '00'
        else:
            answer_mod = '00'
        return answer_mod

    def _fuzzy_search(self, query: str, choices: list[str], n: int) -> str:
        # TODO score_cutoff as a parameter
        x = rapidfuzz.process.extract(query, choices, limit=n, score_cutoff=50)
        return x[0][0] if x else ''
   
    def get_sidc(self, query_a: str, query_b: str, mod1='', mod2='') -> str:
        """ query in set A treated like few separate words
            query in set B treated like a single sentence
            each modifier is a separate single word/sentence
        """
        a = self._search_a(query_a)
        b = self._search_b(query_b, mod1, mod2)
        return a + b

    def get_svg(self, sidc: str, size=35) -> str:
        svg_text = self._ctx.eval(f'new ms.Symbol({sidc}, {{"size": {size}}}).asSVG()')
        return svg_text

    def save_png(self, svg_text: str, path_to_file: pathlib.Path) -> None:
        cairosvg.svg2png(svg_text, write_to=path_to_file)

### test

In [7]:
# you need to download milsynbol.js
x = SIDCFuzzySearcher('../json/set_a.json', '../json/set_b.json', 'milsymbol.js')

query_a = "Hostile Realty Land Present Platoon TaskForce"
query_b = "mortar armored"
x.get_sidc(query_a, query_b, mod1='sniper', mod2='airborn')

'10061024141308016101'