In [2]:
import re
import difflib
import pandas as pd


known_universe = [
    "AKS Midco 3 Ltd.",
    "CDMA Acquisition Corp.",
    "Safety Enhancement Group, LLC",
    "Growlings Sporting Goods Company, Inc.",
    "ZDS Brands, LLC"
]

input_list = [
    "AKS Group",
    "CDM Acquisitionco, Inc.",
    "FA/SEG Holdings LLC (Safety Enhancement Group)",
    "Growlings Parent, Inc.",
    "ZDSP Acquisition Corp and Subsidiaries",
    "XYZ",
    "MAL Intermediate LLC"
]

def clean_name(name):
    name = name.lower()
    name = re.sub(r'[\(\)\.,]', '', name)
    name = re.sub(r'\b(inc|llc|corp|ltd|company|co|holdings|group|parent|subsidiaries)\b', '', name)
    name = re.sub(r'\s+', ' ', name).strip()
    return name

cleaned_universe = {name: clean_name(name) for name in known_universe}

def map_name(arbitrary_name, threshold=0.75):
    cleaned_input = clean_name(arbitrary_name)

    best_match = difflib.get_close_matches(
        cleaned_input,
        list(cleaned_universe.values()),
        n=1,
        cutoff=threshold
    )

    if best_match:

        for original, cleaned in cleaned_universe.items():
            if cleaned == best_match[0]:
                return original
    return "NA"
results = {name: map_name(name) for name in input_list}

df = pd.DataFrame(list(results.items()), columns=["Input Name", "Mapped Name"])
print(df)

                                       Input Name  \
0                                       AKS Group   
1                         CDM Acquisitionco, Inc.   
2  FA/SEG Holdings LLC (Safety Enhancement Group)   
3                          Growlings Parent, Inc.   
4          ZDSP Acquisition Corp and Subsidiaries   
5                                             XYZ   
6                            MAL Intermediate LLC   

                     Mapped Name  
0                             NA  
1         CDMA Acquisition Corp.  
2  Safety Enhancement Group, LLC  
3                             NA  
4                             NA  
5                             NA  
6                             NA  
