In [55]:
# Step 1: loading all required libraries
import pandas as pd
import numpy as np
from rapidfuzz import fuzz, process
from typing import Tuple, List
from sklearn.metrics import precision_score, recall_score, f1_score


In [56]:
# Load your customer dataset (Kaggle or otherwise)
customer_df = pd.read_csv("C:/Users/ajaxc/Downloads/Sanctioned_List_By_Countries.csv")
input_names = customer_df["name"].dropna().unique().tolist()

# Load OFAC SDN list and rename second column to 'entity_name'
ofac_df = pd.read_csv("C:/Users/ajaxc/Downloads/sdn.csv")
cols = list(ofac_df.columns)
cols[1] = "entity_name"
ofac_df.columns = cols
sanctions_list = ofac_df["entity_name"].dropna().unique().tolist()


In [57]:
def fast_match(input_name, sanctions_list, threshold=85):
    """
    Use rapidfuzz.process.extractOne for fast approximate matching.
    Returns best match, score, and match flag.
    """
    result = process.extractOne(
        input_name,
        sanctions_list,
        scorer=fuzz.token_sort_ratio
    )
    if result:
        match_name, score, _ = result
        return match_name, score, True
    else:
        return None, 0, None

def fast_screen(inputs, sanctions_list, threshold=85):
    """
    Fast fuzzy screening over list of input names.
    """
    results = []
    for name in inputs:
        best_match, score,_ = fast_match(name, sanctions_list)
        is_hit = score >= threshold
        results.append({
            "Input Name": name,
            "Best Match": best_match,
            "Match Score": score,
            "Is Match": is_hit
        })
    return pd.DataFrame(results)


In [58]:
# You can reduce the number of records during testing for speed
# input_names = input_names[:500]

df_screened = fast_screen(input_names, sanctions_list, threshold=85)
df_screened.head()


Unnamed: 0,Input Name,Best Match,Match Score,Is Match
0,VALUMAR LLC,UMAC LLC,73.684211,False
1,Hadi Soleimanpour,"DAOUD, Suleiman Cabdi",57.894737,False
2,Alí Fallahijan,"AZZAM, Mansour Fadlallah",52.631579,False
3,Ahmad Reza Asghari,"SHIVA'I, Ahmad Asghari",70.0,False
4,hussein mohamed jomma,"SALAD, Mohamed Hussein",60.465116,False


In [59]:
def categorize(score):
    if score >= 85:
        return "Strong Match"
    elif score >= 60:
        return "Partial Match"
    elif score > 0:
        return "Weak Match"
    else:
        return "No Match"

df_screened["Match Category"] = df_screened["Match Score"].apply(categorize)


In [None]:
df_screened.to_csv("C:/Users/ajaxc/Downloads/df_screened.csv", index=False)