In [1]:
import re
import difflib
import pandas as pd

# Known universe of entities
known_universe = [
    "AKS Midco 3 Ltd.",
    "CDMA Acquisition Corp.",
    "Safety Enhancement Group, LLC",
    "Growlings Sporting Goods Company, Inc.",
    "ZDS Brands, LLC"
]

# Input list of arbitrary fund names
input_list = [
    "AKS Group",
    "CDM Acquisitionco, Inc.",
    "FA/SEG Holdings LLC (Safety Enhancement Group)",
    "Growlings Parent, Inc.",
    "ZDSP Acquisition Corp and Subsidiaries",
    "XYZ",
    "MAL Intermediate LLC"
]

# Function to clean entity names
def clean_name(name: str) -> str:
    name = name.lower()
    name = re.sub(r'[\(\)\.,]', '', name)  # remove punctuation
    # remove only legal suffixes, keep meaningful words
    name = re.sub(r'\b(inc|llc|corp|ltd|company|co)\b', '', name)
    name = re.sub(r'\s+', ' ', name).strip()
    return name

# Clean known universe
cleaned_universe = {name: clean_name(name) for name in known_universe}

# Mapping function
def map_name(arbitrary_name, threshold=0.6):
    cleaned_input = clean_name(arbitrary_name)

    best_match = difflib.get_close_matches(
        cleaned_input,
        list(cleaned_universe.values()),
        n=1,
        cutoff=threshold
    )

    if best_match:
        for original, cleaned in cleaned_universe.items():
            if cleaned == best_match[0]:
                return original
    return "NA"

# Run mapping
results = {name: map_name(name) for name in input_list}

# Convert to DataFrame for display
df = pd.DataFrame(list(results.items()), columns=["Input Name", "Mapped Name"])
print(df)


                                       Input Name  \
0                                       AKS Group   
1                         CDM Acquisitionco, Inc.   
2  FA/SEG Holdings LLC (Safety Enhancement Group)   
3                          Growlings Parent, Inc.   
4          ZDSP Acquisition Corp and Subsidiaries   
5                                             XYZ   
6                            MAL Intermediate LLC   

                              Mapped Name  
0                                      NA  
1                  CDMA Acquisition Corp.  
2           Safety Enhancement Group, LLC  
3  Growlings Sporting Goods Company, Inc.  
4                                      NA  
5                                      NA  
6                                      NA  
