# Sanctions Data Organizations Parser

## Load JSON Sanctions Data

In [1]:
import json

json_file = "Datasets/full_sanctions_data.json"

with open(json_file, "r") as file:
    json_f = file.read()
    
data = json.loads(json_f)

## Screen for "Entity" Entities

In [2]:
entity_data = data['sanctionsData']["entities"]["entity"]

org_data = []

for entity in entity_data: 
    entity_type = entity["generalInfo"]["entityType"]
    if entity_type == "Entity":
        org_data.append(entity)
       
print(f"Total entities found: {len(data['sanctionsData']["entities"]["entity"])}") 
print(f"Organization entities found: {len(org_data)}")

Total entities found: 16465
Organization entities found: 8018


## Parse Organization Data to Dict

In [28]:
def parse_entity(entity):
    
    """
    Given an XML line of sanction data for an organization,
    populates a dictionary with useful information.
    
    Returns the dictionary.
    """
    ## Entity dictionary 
    entity_dict = {
        "name": "",
        "sanctions_lists": "",
        "sanctions_programs": [],
        "sanctions_types": "",
        "legal_authorities": "",
        # "aliases": [],
        # "relationships": [],
        # "identity_documents": [],
        # "addresses": [],
        # "features": [],
        # "remarks": ""
    }
   
    # print(entity["generalInfo"]["identityId"])
    
    ## Name
    name_ele = entity["names"]["name"]
   
    ### If element is a list, assign primary and aliases
    if type(name_ele) == list:
        
        for name in name_ele:
            
            #### Find the entity's primary full name 
            if name["isPrimary"] == "true":
                
                ##### Confirm script is latin and ignore other scripts if they exist 
                if type(name["translations"]["translation"]) == dict:
                    entity_name = name["translations"]["translation"]["nameParts"]["namePart"]["value"]
                    entity_dict["name"] = entity_name
                    
                elif type(name["translations"]["translation"]) == list:
                    for trans in name["translations"]["translation"]:
                        if trans["script"] == "Latin":
                            # This approach assumes there will only ever be one latin script translation
                            entity_name = trans["nameParts"]["namePart"]["value"]
            
            #### Add non-primary names as aliases  
            elif name["isPrimary"] == "false":
                
                ##### Create an alias dict value if none exist 
                if "aliases" not in entity_dict.keys():
                    entity_dict["aliases"] = []
                    
                alias_type = name["aliasType"]
                
                ##### Confirm script is latin and ignore other scripts if they exist 
                if type(name["translations"]["translation"]) == dict:
                    
                    alias = name["translations"]["translation"]["nameParts"]["namePart"]
                    
                    
                    ###### In rare cases, a translation will include two namePart entries
                    if type(alias) == dict:
                        alias = alias["value"]
                    
                    elif type(alias) == list:
                        comp_alias = ""
                        
                        for part in alias:
                            comp_alias += part["value"]
                            comp_alias += ", "
                            
                        alias = comp_alias[:-2]
                            
                            
                        
                elif type(name["translations"]["translation"]) == list:
                    for trans in name["translations"]["translation"]:
                        if trans["script"] == "Latin":
                            # This approach assumes there will only ever be one latin script translation
                            alias = trans["nameParts"]["namePart"]["value"]
                             
                entity_dict["aliases"].append([alias_type, alias])
       
    #### Parse name dict element
    elif type(name_ele) == dict:
        
        ##### Confirm script is latin and ignore other scripts if they exist 
        if type(name_ele["translations"]["translation"]) == dict:
            entity_name = name_ele["translations"]["translation"]["nameParts"]["namePart"]["value"]
            entity_dict["name"] = entity_name
            
        elif type(name_ele["translations"]["translation"]) == list:
            for trans in name_ele["translations"]["translation"]:
                if trans["script"] == "Latin":
                    # This approach assumes there will only ever be one latin script translation
                    entity_name = trans["nameParts"]["namePart"]["value"]
                            
        entity_dict["name"] = entity_name
    
    
    
    ## Sanctions list, program, type, legal authority
    entity_dict["sanctions_lists"] = entity["sanctionsLists"]["sanctionsList"]
    entity_dict["sanctions_programs"] = entity["sanctionsPrograms"]["sanctionsProgram"]
    entity_dict["sanctions_types"] = entity["sanctionsTypes"]["sanctionsType"]
    
    if entity["legalAuthorities"] != None:
        entity_dict["legal_authorities"] = entity["legalAuthorities"]["legalAuthority"]
        


    ## Relationships
    if "relationships" in entity.keys():
        
        if entity["relationships"] != None:
            
            entity_dict["relationships"] = []
            relationships = entity["relationships"]["relationship"]
            
            if type(relationships) == dict:
                
                rel_type = relationships["type"]
                rel_entity = relationships["relatedEntity"]
                entity_dict["relationships"].append([rel_type, rel_entity])
                
            elif type(relationships) == list: 
                
                for rel in relationships:
                    rel_type = rel["type"]
                    rel_entity = rel["relatedEntity"]
                    entity_dict["relationships"].append([rel_type, rel_entity])            
        
    
    
    ## Identity Documents
    if "identityDocuments" in entity.keys():
    
        if entity["identityDocuments"] != None:
            entity_dict["identity_documents"] = []
            id_docs = entity["identityDocuments"]["identityDocument"]
            
            if type(id_docs) == dict:
                
                id_type = id_docs["type"]
                id_name = id_docs["name"]
                id_docno = id_docs["documentNumber"]
                id_valid = id_docs["isValid"]
                
                if "issuingCountry" in id_docs.keys(): 
                    id_issuer = id_docs["issuingCountry"]
                else: id_issuer = ""
                
                entity_dict["identity_documents"].append([id_type, id_name, id_docno, id_valid, id_issuer])
                
            elif type(id_docs) == list: 
                
                for id in id_docs:
                    
                    id_type = id["type"]
                    id_name = id["name"]
                    id_docno = id["documentNumber"]
                    id_valid = id["isValid"]
                    
                    if "issuingCountry" in id.keys(): 
                        id_issuer = id["issuingCountry"]
                    else: id_issuer = ""
                
                    entity_dict["identity_documents"].append([id_type, id_name, id_docno, id_valid, id_issuer])
        
      
      
    ## Features
    if "features" in entity.keys():
        
        entity_dict["features"] = []
        features = entity["features"]["feature"]
        
        if type(features) == dict:
            feature_type = features["type"]
            feature_value = features["value"]
            
            # if "valueDate" in features.keys():
            #     feature_date_from = features["documentNumber"]
            # else: feature_date_from = ""
            
            entity_dict["features"].append([feature_type, feature_value])
            
        elif type(features) == list: 
            
            for feature in features:
                feature_type = feature["type"]
                feature_value = feature["value"]
                entity_dict["features"].append([feature_type, feature_value])
    
    
    
    ## Remarks
    if "remarks" in entity["generalInfo"].keys():
        entity_dict["remarks"] = entity["generalInfo"]["remarks"]
    
    return entity_dict
    

In [26]:
test_entity = org_data[0]
test_entity

{'generalInfo': {'identityId': '18377', 'entityType': 'Entity'},
 'sanctionsLists': {'sanctionsList': 'SDN List'},
 'sanctionsPrograms': {'sanctionsProgram': 'CYBER2'},
 'sanctionsTypes': {'sanctionsType': 'Block'},
 'legalAuthorities': {'legalAuthority': 'Executive Order 13694 (Cyber)'},
 'names': {'name': [{'isPrimary': 'true',
    'isLowQuality': 'false',
    'translations': {'translation': [{'isPrimary': 'true',
       'script': 'Latin',
       'formattedLastName': 'OPTIMA, OOO',
       'formattedFullName': 'OPTIMA, OOO',
       'nameParts': {'namePart': {'type': 'Entity Name',
         'value': 'Optima, OOO'}}},
      {'isPrimary': 'false',
       'script': 'Cyrillic',
       'formattedLastName': 'ООО ОПТИМА',
       'formattedFullName': 'ООО ОПТИМА',
       'nameParts': {'namePart': {'type': 'Entity Name',
         'value': 'ООО Оптима'}}}]}},
   {'isPrimary': 'false',
    'aliasType': 'A.K.A.',
    'isLowQuality': 'false',
    'translations': {'translation': {'isPrimary': 'true'

In [29]:
parse_entity(test_entity)

{'name': '',
 'sanctions_lists': 'SDN List',
 'sanctions_programs': 'CYBER2',
 'sanctions_types': 'Block',
 'legal_authorities': 'Executive Order 13694 (Cyber)',
 'aliases': [['A.K.A.',
   'OBSHCHESTVO S OGRANICHENNOI OTVETSTVENNOSTYU Optima']],
 'relationships': [['Owned or Controlled By', 'GUSEV, Denis Igorevich']],
 'identity_documents': [['Government Gazette Number',
   'OPTIMA, OOO',
   '17325717',
   'true',
   'Russia'],
  ['Registration Number', 'OPTIMA, OOO', '1137746232260', 'true', 'Russia'],
  ['Tax ID No.', 'OPTIMA, OOO', '7716740680', 'true', 'Russia']],
 'features': [['D-U-N-S Number', '50-579-8144'],
  ['Secondary sanctions risk:',
   'Ukraine-/Russia-Related Sanctions Regulations, 31 CFR 589.201']]}

In [30]:
counter = 0
fail_indexes = []

entity_dicts = []

for entity in org_data:
    counter += 1
    
    try:
        entity_dicts.append(parse_entity(entity))
        
    except:
        fail_indexes.append(counter)

print(f"Attempts: {counter}")
print(f"Failed: {len(fail_indexes)}")
print(f"Succeeded: {len(entity_dicts)}")
print(f"Failed on: {fail_indexes}")

Attempts: 8018
Failed: 0
Succeeded: 8018
Failed on: []


## Convert Entities into .md Files

In [32]:
def create_org_entity_note(entity):

    file_name = f"{entity["name"]}.md"
    
    text = f"""---
Entity Type: Organization
Sanctions Lists: {entity["sanctions_lists"]}
Sanctions Programs: {entity["sanctions_programs"]}
Sanctions Types: {entity["sanctions_types"]}
Legal Authorities: {entity["legal_authorities"]}
---
"""

    ## Remarks
    if  "remarks" in entity.keys():
        text += entity["remarks"] 
        text += "\n"

    ## Aliases
    if "aliases" in entity.keys():
        
        
        alias_text = """
## Aliases
| Type  | Name      | 
|-------|-----------|
"""


        for alias in entity["aliases"]: 
            alias_line = f"| {alias[0]} | {alias[1]} |\n"
            alias_text += alias_line
        
        text += alias_text
        
        
    ## Relationships
        if "relationships" in entity.keys():
            
            
            rel_text = """
## Relationships
| Type  | With      | 
|-------|-----------|
"""


            for rel in entity["relationships"]: 
                rel_line = f"| {rel[0]} | [[{rel[1]}]] |\n"
                rel_text += rel_line
            
            text += rel_text

    ## Identity Documents
        if "identity_documents" in entity.keys():
            
            
            id_text = """
## Identity Documents
| Type  | Name      | Document Number | Is valid |
|-------|-----------|-----------------|----------|
"""


            for id in entity["identity_documents"]: 
                id_line = f"| {id[0]} | {id[1]} | {id[2]} | {id[3]} |\n"
                id_text += id_line
            
            text += id_text
            
    ## Features
        if "features" in entity.keys():
            
            
            features_text = """
## Features
| Type  | Value      |
|-------|------------|
"""


            for feature in entity["features"]: 
                feature_line = f"| {feature[0]} | {feature[1]} |\n"
                features_text += feature_line
            
            text += features_text
            
    return(file_name, text)


In [33]:
create_org_entity_note(entity_dicts[0])

# entity_dicts[0]

('.md',
 '---\nEntity Type: Organization\nSanctions Lists: SDN List\nSanctions Programs: CYBER2\nSanctions Types: Block\nLegal Authorities: Executive Order 13694 (Cyber)\n---\n\n## Aliases\n| Type  | Name      | \n|-------|-----------|\n| A.K.A. | OBSHCHESTVO S OGRANICHENNOI OTVETSTVENNOSTYU Optima |\n\n## Relationships\n| Type  | With      | \n|-------|-----------|\n| Owned or Controlled By | [[GUSEV, Denis Igorevich]] |\n\n## Identity Documents\n| Type  | Name      | Document Number | Is valid |\n|-------|-----------|-----------------|----------|\n| Government Gazette Number | OPTIMA, OOO | 17325717 | true |\n| Registration Number | OPTIMA, OOO | 1137746232260 | true |\n| Tax ID No. | OPTIMA, OOO | 7716740680 | true |\n\n## Features\n| Type  | Value      |\n|-------|------------|\n| D-U-N-S Number | 50-579-8144 |\n| Secondary sanctions risk: | Ukraine-/Russia-Related Sanctions Regulations, 31 CFR 589.201 |\n')

In [34]:
counter = 0
fail_indexes = []

entity_texts = []

for entity in entity_dicts:
    counter += 1
    
    try:
        entity_texts.append(create_org_entity_note(entity))
        
    except:
        fail_indexes.append(counter)

print(f"Attempts: {counter}")
print(f"Failed: {len(fail_indexes)}")
print(f"Succeeded: {counter - len(fail_indexes)}")
print(f"Failed on: {fail_indexes}")

Attempts: 8018
Failed: 0
Succeeded: 8018
Failed on: []


## Save Notes to Files

In [38]:
import os

def create_md_files(file_name, file_content):
    
    destination_folder = "Vault/"

    if os.path.exists(file_name):
        return
    file_name = file_name.replace("/", ",")
    with open(f'{destination_folder}{file_name}', 'w') as f:
        f.write(file_content)    

In [39]:
for entity in entity_texts:
    
    create_md_files(entity[0], entity[1])