## Sanctions Data Tools

A collection of useful code snippets for working with [sanctions data](https://sanctionslist.ofac.treas.gov/Home/CustomizeSanctionsDataset#) from the [United States Office of Foreign Assets Control](https://ofac.treasury.gov/). 

- Load XML Sanctions Data as JSON


## Load XML Sanctions Data as JSON

In [1]:
xml_filepath = "Datasets/IRGC_sanctions.xml"

In [None]:
from bs4 import BeautifulSoup

def xml_to_json(element):
    
    """
    Recursively parses XML soup, returning as JSON format 
    """
    
    if isinstance(element, str):
        return element
    
    if not element.contents:
        return element.string
    
    result = {}
    
    for child in element.children:
        
        if isinstance(child, str):
            continue
        
        if child.name not in result:
            result[child.name] = xml_to_json(child)
            
        else:
            if not isinstance(result[child.name], list):
                result[child.name] = [result[child.name]]
            result[child.name].append(xml_to_json(child))
            
    ### Directly capture text nodes without 'text' key
    if element.string and element.string.strip():
        return element.string.strip()
    
    return result


with open(xml_filepath, "r") as file:
    xml_data = file.read()

## Convert XML to JSON, isolate entity data 
soup = BeautifulSoup(xml_data, features='xml')

entity_json = xml_to_json(soup)
entity_data = entity_json['sanctionsData']["entities"]["entity"]
entity_data = [entity for entity in entity_data if entity["generalInfo"]["entityType"] in ["Individual", "Entity"]]
print(f"Entities found: {len(entity_data)}")

## Testing Function

In [None]:
def function_test(input_data, test_function):

    counter = 0
    fail_indexes = []

    rel_list = []

    for entity in entity_data:
        counter += 1
        
        try:
            rel_list.append((test_function(entity)))
            
        except:
            fail_indexes.append(counter)

    print(f"Attempts: {counter}")
    print(f"Failed: {len(fail_indexes)}")
    print(f"Succeeded: {counter - len(fail_indexes)}")
    print(f"Failed on: {fail_indexes}")


