In [1]:
# Project Title: Name Screening & Sanctions Watchlist Matching

In [2]:
import pandas as pd
import numpy as np
from difflib import SequenceMatcher

pd.set_option("display.max_columns", None)


In [3]:
customers = pd.DataFrame({
    "customer_id": range(1, 21),
    "full_name": [
        "John Mwangi", "Ali Hassan", "Mohamed Abdalla", "Peter Otieno",
        "Fatima Noor", "Ahmed Ali", "James Kariuki", "Samuel Kiptoo",
        "Hassan Omar", "Amina Yusuf", "Daniel Kimani", "Ibrahim Musa",
        "Joseph Mutua", "Omar Said", "Grace Wanjiku",
        "Abdul Rahman", "Paul Njoroge", "Salim Mohamed",
        "Mary Achieng", "Yusuf Ali"
    ],
    "country": np.random.choice(
        ["Kenya", "Somalia", "Sudan", "Iran", "Uganda"], 20
    )
})

customers.head()


Unnamed: 0,customer_id,full_name,country
0,1,John Mwangi,Kenya
1,2,Ali Hassan,Uganda
2,3,Mohamed Abdalla,Kenya
3,4,Peter Otieno,Sudan
4,5,Fatima Noor,Kenya


In [4]:
sanctions = pd.DataFrame({
    "sanctioned_name": [
        "Ahmed Ali",
        "Mohamed Abdallah",
        "Yusuf Ali",
        "Abdul Rahman",
        "Hassan Omar"
    ],
    "sanctioning_body": [
        "UN", "OFAC", "UN", "OFAC", "EU"
    ]
})

sanctions


Unnamed: 0,sanctioned_name,sanctioning_body
0,Ahmed Ali,UN
1,Mohamed Abdallah,OFAC
2,Yusuf Ali,UN
3,Abdul Rahman,OFAC
4,Hassan Omar,EU


In [5]:
def similarity_score(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()


In [6]:
matches = []

for _, cust in customers.iterrows():
    for _, sanc in sanctions.iterrows():
        score = similarity_score(cust["full_name"], sanc["sanctioned_name"])

        if score >= 0.85:
            matches.append({
                "customer_id": cust["customer_id"],
                "customer_name": cust["full_name"],
                "sanctioned_name": sanc["sanctioned_name"],
                "sanctioning_body": sanc["sanctioning_body"],
                "match_score": round(score, 2)
            })

matches_df = pd.DataFrame(matches)
matches_df


Unnamed: 0,customer_id,customer_name,sanctioned_name,sanctioning_body,match_score
0,3,Mohamed Abdalla,Mohamed Abdallah,OFAC,0.97
1,6,Ahmed Ali,Ahmed Ali,UN,1.0
2,9,Hassan Omar,Hassan Omar,EU,1.0
3,16,Abdul Rahman,Abdul Rahman,OFAC,1.0
4,20,Yusuf Ali,Yusuf Ali,UN,1.0


In [7]:
matches_df["risk_level"] = np.where(
    matches_df["match_score"] >= 0.95,
    "High Risk – Likely Match",
    "Medium Risk – Possible Match"
)

matches_df


Unnamed: 0,customer_id,customer_name,sanctioned_name,sanctioning_body,match_score,risk_level
0,3,Mohamed Abdalla,Mohamed Abdallah,OFAC,0.97,High Risk – Likely Match
1,6,Ahmed Ali,Ahmed Ali,UN,1.0,High Risk – Likely Match
2,9,Hassan Omar,Hassan Omar,EU,1.0,High Risk – Likely Match
3,16,Abdul Rahman,Abdul Rahman,OFAC,1.0,High Risk – Likely Match
4,20,Yusuf Ali,Yusuf Ali,UN,1.0,High Risk – Likely Match


In [8]:
matches_df.to_csv("potential_sanctions_matches.csv", index=False)
print("Export complete: potential_sanctions_matches.csv")


Export complete: potential_sanctions_matches.csv
