# Sanctions Data Organizations Parser

## Load JSON Sanctions Data

In [2]:
import json

json_file = "Datasets/IRGC_sanctions.json"

with open(json_file, "r") as file:
    json_f = file.read()
    
data = json.loads(json_f)

## Screen for "Entity" Entities

In [3]:
entity_data = data['sanctionsData']["entities"]["entity"]

org_data = []

for entity in entity_data: 
    entity_type = entity["generalInfo"]["entityType"]
    if entity_type == "Entity":
        org_data.append(entity)
       
print(f"Total entities found: {len(data['sanctionsData']["entities"]["entity"])}") 
print(f"Organization entities found: {len(org_data)}")

Total entities found: 293
Organization entities found: 84


## Parse Organization Data to Dict

In [62]:
def parse_entity(entity):
    
    """
    Given an XML line of sanction data for an organization,
    populating a dictionary with useful information.
    
    Returns the dictionary.
    """
    ## Entity dictionary 
    entity_dict = {
        "name": "",
        "sanctions_lists": "",
        "sanctions_programs": [],
        "sanctions_types": "",
        "legal_authorities": "",
        "aliases": [],
        "relationships": [],
        "identity_documents": [],
        "addresses": [],
        "features": []
    }
   
    # print(entity["generalInfo"]["identityId"])
    
    ## Name
    name_ele = entity["names"]["name"]
   
    ### If element is a list, assign primary and aliases
    if type(name_ele) == list:
        
        for name in name_ele:
            
            #### Find the entity's primary full name 
            if name["isPrimary"] == "true":
                
                ##### Confirm script is latin and ignore other scripts if they exist 
                if type(name["translations"]["translation"]) == dict:
                    entity_name = name["translations"]["translation"]["nameParts"]["namePart"]["value"]
                    entity_dict["name"] = entity_name
                    
                elif type(name["translations"]["translation"]) == list:
                    for trans in name["translations"]["translation"]:
                        if trans["script"] == "Latin":
                            # This approach assumes there will only ever be one latin script translation
                            entity_name = trans["nameParts"]["namePart"]["value"]
            
            #### Add non-primary names as aliases  
            elif name["isPrimary"] == "false":
                
                alias_type = name["aliasType"]
                
                ##### Confirm script is latin and ignore other scripts if they exist 
                if type(name["translations"]["translation"]) == dict:
                    alias = name["translations"]["translation"]["nameParts"]["namePart"]["value"]

                elif type(name["translations"]["translation"]) == list:
                    for trans in name["translations"]["translation"]:
                        if trans["script"] == "Latin":
                            # This approach assumes there will only ever be one latin script translation
                            alias = trans["nameParts"]["namePart"]["value"]
                                    
                entity_dict["aliases"].append([alias_type, alias])
       
    #### Parse name dict element
    elif type(name_ele) == dict:
        
        ##### Confirm script is latin and ignore other scripts if they exist 
        if type(name_ele["translations"]["translation"]) == dict:
            entity_name = name_ele["translations"]["translation"]["nameParts"]["namePart"]["value"]
            entity_dict["name"] = entity_name
            
        elif type(name_ele["translations"]["translation"]) == list:
            for trans in name_ele["translations"]["translation"]:
                if trans["script"] == "Latin":
                    # This approach assumes there will only ever be one latin script translation
                    entity_name = trans["nameParts"]["namePart"]["value"]
                            
        entity_dict["name"] = entity_name
    
    
    
    ## Sanctions list, program, type, legal authority
    entity_dict["sanctions_lists"] = entity["sanctionsLists"]["sanctionsList"]
    entity_dict["sanctions_programs"] = entity["sanctionsPrograms"]["sanctionsProgram"]
    entity_dict["sanctions_types"] = entity["sanctionsLists"]["sanctionsList"]
    entity_dict["legal_authorities"] = entity["legalAuthorities"]["legalAuthority"]
        

    ## Relationships
    relationships = entity["relationships"]["relationship"]
    
    if type(relationships) == dict:
        
        rel_type = relationships["type"]
        rel_entity = relationships["relatedEntity"]
        entity_dict["relationships"].append([rel_type, rel_entity])
        
    elif type(relationships) == list: 
        
        for rel in relationships:
            rel_type = rel["type"]
            rel_entity = rel["relatedEntity"]
            entity_dict["relationships"].append([rel_type, rel_entity])            
    
    ## Identity Documents
    
    
    ## Addresses
    
    
    ## Features
    
    
    return entity_dict
    

In [63]:
test_entity = org_data[77]

parse_entity(test_entity)

{'name': 'China Oil And Petroleum Company Limited',
 'sanctions_lists': 'SDN List',
 'sanctions_programs': ['IFSR', 'IRGC', 'SDGT'],
 'sanctions_types': 'SDN List',
 'legal_authorities': 'Executive Order 13224 (Terrorism)',
 'aliases': [],
 'relationships': [['Providing support to',
   'ISLAMIC REVOLUTIONARY GUARD CORPS (IRGC)-QODS FORCE']],
 'identity_documents': [],
 'addresses': [],
 'features': []}

In [64]:
counter = 0

for entity in org_data:
    counter += 1
    
    try:
        parse_entity(entity)
        
    except:
        print(f"fail on: {counter}")

fail on: 1
fail on: 2
fail on: 16
fail on: 17
fail on: 18
fail on: 19
fail on: 20
fail on: 21
fail on: 27
fail on: 28
fail on: 29
fail on: 30
fail on: 31
fail on: 32
fail on: 33
fail on: 34
fail on: 35
fail on: 36
fail on: 37
fail on: 38
fail on: 40
fail on: 42
fail on: 44
fail on: 45
fail on: 46
fail on: 47
fail on: 48
fail on: 49
fail on: 50
fail on: 51
fail on: 52
fail on: 54
fail on: 55
fail on: 56
fail on: 57
fail on: 58
fail on: 59
fail on: 60
fail on: 61
fail on: 77
fail on: 84


In [None]:
    if "idList" in entity.keys():
        
        # Note that the value of akaList may be a list of dictionaries or a single dictionary
        
        idList_val = entity["idList"]["id"]

        if type(idList_val) == list:
            
            for id in idList_val:
            
                if "idType" in id.keys():
                    id_type = id["idType"]
                else:
                    id_type = ""
                
                if "idNumber" in id.keys():
                    id_number = id["idNumber"]
                else:
                    id_number = ""
                    
                if "idCountry" in id.keys():
                    id_country = id["idCountry"]
                else:
                    id_country = ""
                
                
                id_line = ["|", id_type, "|", id_number, "|", id_country, "|\n"]
                id_header += " ".join(id_line)
                
        elif type(idList_val) == dict:
            
            id = idList_val
            
            if "idType" in id.keys():
                id_type = id["idType"]
            else:
                id_type = ""
            
            if "idNumber" in id.keys():
                id_number = id["idNumber"]
            else:
                id_number = ""
                    
            id_line = ["|", id_type, "|", id_number, "|\n"]
            id_header += " ".join(id_line)
        
        org_note_template += id_header
        org_note_template += "\n"
    
    ### AKA List Header
    if "akaList" in entity.keys():
        
        # Note that the value of akaList may be a list of dictionaries or a single dictionary
        
        akaList_val = entity["akaList"]["aka"]
        
        if type(akaList_val) == list:
            
            for aka in akaList_val:
                aka_line = ["|", aka["type"] , "|", aka["category"], "|", aka["lastName"], "|\n"]
                aka_header += " ".join(aka_line)
                
        elif type(akaList_val) == dict:
            
            aka = akaList_val    
            aka_line = ["|", aka["type"], "|", aka["category"], "|", aka["lastName"], "|\n"]
            aka_header += " ".join(aka_line)
        
        org_note_template += aka_header
        org_note_template += "\n"
    
    
    ### Address List Header
    if "addressList" in entity.keys():
        
        # Note that the value of akaList may be a list of dictionaries or a single dictionary
        
        address_val = entity["addressList"]["address"]
        
        if type(address_val) == list:
            
            for address in address_val:
                
                if "address1" in address.keys():
                    address_1 = address["address1"]
                else:
                    address_1 = ""
                    
                if "address2" in address.keys():
                    address_2 = address["address2"]
                else:
                    address_2 = ""
                
                if "address3" in address.keys():
                    address_3 = address["address3"]
                else:
                    address_3 = ""
                
                if "city" in address.keys():
                    address_city = address["city"]
                else:
                    address_city = ""
                    
                if "postalCode" in address.keys():
                    address_postalcode = address["postalCode"]
                else:
                    address_postalcode = ""
                
                if "country" in address.keys():
                    address_country = address["country"]
                else:
                    address_country = ""                    
                
                
                address_line = ["|", address_1, 
                                "|", address_2,
                                "|", address_3,
                                "|", address_city,
                                "|", address_postalcode,
                                "|", address_country, 
                                "|\n"
                                ]
                
                address_header += " ".join(address_line)
                
                
        elif type(address_val) == dict:
            
            address = address_val    
            
            if "address1" in address.keys():
                address_1 = address["address1"]
            else:
                address_1 = ""
                
            if "address2" in address.keys():
                address_2 = address["address2"]
            else:
                address_2 = ""
            
            if "address3" in address.keys():
                address_3 = address["address3"]
            else:
                address_3 = ""
            
            if "city" in address.keys():
                address_city = address["city"]
            else:
                address_city = ""
                
            if "postalCode" in address.keys():
                address_postalcode = address["postalCode"]
            else:
                address_postalcode = ""
            
            if "country" in address.keys():
                address_country = address["country"]
            else:
                address_country = ""                    
            
            
            address_line = ["|", address_1, 
                            "|", address_2,
                            "|", address_3,
                            "|", address_city,
                            "|", address_postalcode,
                            "|", address_country, 
                            "|\n"
                            ]
                
            address_header += " ".join(address_line)
            
            
        org_note_template += address_header
        org_note_template += "\n"

In [None]:
def create_entity_note(entity):
        file_name = f"{entity["lastName"]}"
    program = entity["programList"]["program"]
    
    org_note_template = f"""---
Entity Type: Entity
Sanctions Program: {program}
---
"""

    if "remarks" in entity.keys():
        org_note_template += entity["remarks"] 
        org_note_template += "\n"
        
        
    ## Header Templates
    id_header = """
## IDs
| ID Type | ID Number | ID Country |
|---------|-----------|------------|
"""
    
    aka_header = """
## AKAs
| Type | Category | Name      | 
|------|----------|-----------|
"""

    address_header = """
## Addresses
| Line 1 | Line 2 | line 3 | City | Postal Code| Country | 
|--------|--------|--------|------|------------|---------|
"""