In [1]:
import re
import time  # Import time module

# Define regex patterns for each entity
patterns = {
    "Full Name": r"\b[A-Z][a-z]+ [A-Z][a-z]+\b",  # Name with two words, capitalized
    "Gender": r"\b(Male|Female|Non[- ]binary|Transgender|Other|male|female|non[- ]binary|transgender|M|F|NB|T|m|f|t|nb|Mr|Mrs|Ms|Miss|Mx)\.?\b",
    "Date of Birth": r"\b(?:\d{1,2}[-/.]\d{1,2}[-/.]\d{2,4}|\d{4}[-/.]\d{1,2}[-/.]\d{1,2}|(?:\d{1,2}(?:st|nd|rd|th)?[-\s])?(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[-\s]?\d{1,2}(?:,?[-\s]?\d{2,4})?)\b",
    "Nationality": r"\b(Indian|Indian)\b",  # Only "Indian"
    "City": r"(?:lives in|resides in|is based in|is located in|in|is in|stays in|is a resident of|is a citizen of|calls(?: it)? home)\s+([A-Za-z\s-]+)",
    "Email Address": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
    "Phone Number": r"\b(?:\+91[\-\s]?)?(?:\(?91\)?[\-\s]?)?[6789]\d{2}[\-\s]?\d{3}[\-\s]?\d{4}\b",
    "Aadhar Number": r"\b[2-9][0-9]{3} [0-9]{4} [0-9]{4}\b",
    "PAN Card Details": r"\b[A-Z]{5}[0-9]{4}[A-Z]{1}\b",
    "Voter ID Number": r"\b([A-Z]{3}[0-9]{7})\b",
    "Passport Number": r"\b[A-Z][1-9][0-9] ?[0-9]{4}[0-9]\b",
    "Driving License Number": r"[A-Z]{2}[-\s]?\d{2}[-\s]?\d{4}[-\s]?\d{7}",
    "Bank Account Details": r"\b\d{11,18}\b",
    "Credit Card and Debit Card Information": r"\b\d{4}[- ]\d{4}[- ]\d{4}[- ]\d{4}\b",
    "IP Address": r"\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
    "IFSC Code": r"\b[A-Z]{4}[-\s]?0[-\s]?[A-Z0-9]{6}\b",
    "Expiry Date": r"\b(?:0[1-9]|1[0-2])\/[2-9][0-9]\b",
    "CVV": r"\b[Cc][Vv][Vv]\s(?:was|is|of)\s(\d{3})\b"
}

# Sample input sentences (Indian details only)
sentences = [
    "Arjun Reddy, Male, born on 25-03-1998, lives in Hyderabad. His email is arjun.reddy@domain.in, and his phone number is +91-9876543210. His aadhar number is 1234 5678 9123, and his PAN card number is ABCDE1234F. He has a Voter ID number XYZ1234567 and a passport number A1234567. His bank account number is 1234567890123456, and his driving license number is TS-10-2019-1234567.",
    "Kavya Sharma, Female, was born on 01/11/2000 in Bengaluru. Her contact number is 9901234567, and her email is kavya.sharma@domain.com. Her aadhar number is 2345 6789 0123, PAN card number is PQRST6789X, and her IFSC code is SBIN0001234. Her IP address is 192.168.1.101.",
    "Rohit Gupta, Male, born on 15-08-1995, resides in Jaipur. His email is rohit.gupta@domain.co.in, and his phone number is 9123456789. His aadhar number is 3456 7890 1234, PAN card number is LMNOP1234Y, and his voter ID number is ABC9876543. The expiry date of his credit card is 06/26, and the CVV is 456.",
    "Priya Singh, Female, born on 10th October 1992, lives in new Delhi. Her contact number is +91-9876543211, and her email is priya.singh@example.in. She provided her Aadhar number 4567 8901 2345, PAN card ABCDE5678Z, and driving license DL-06-2012-0012345. Her IP address is 172.16.254.1.",
    "Abhishek Verma, Male, born on 20/02/1990, stays in Pune. His email is abhishek.verma@domain.in, and his phone number is 9876541230. His aadhar number is 5678 9012 3456, and his bank account number is 123456789012. His IFSC code is ICIC0002345. His credit card number is 5678-1234-9876-5432, expiring 05/27, with a CVV of 789.",
    "Anjali Mehta, Female, born on 12/12/1997, currently resides in Chennai. Her phone number is +91 9123456780, and her email address is anjali.mehta@domain.co.in. Her aadhar number is 6789 0123 4567, her voter ID is XYZ6543210, and her driving license number is TN-22-2015-9876543. Her bank account details are 9876543210123456."
]

# Function to extract entities based on regex patterns
def extract_entities(text, patterns):
    extracted = {}
    for entity, pattern in patterns.items():
        matches = re.findall(pattern, text)
        extracted[entity] = matches
    return extracted

# Start the timer
start_time = time.time()

# Process each sentence
for i, sentence in enumerate(sentences, 1):
    print(f"Extracting entities from sentence {i}:")
    extracted_entities = extract_entities(sentence, patterns)

    # Display the results for the current sentence
    for entity, matches in extracted_entities.items():
        print(f"  {entity}: {matches if matches else 'No match found'}")
    print()  # Blank line between sentences

# Stop the timer
end_time = time.time()

# Display the total time taken
time_taken = end_time - start_time
print(f"Time taken to detect entities: {time_taken:.6f} seconds")


Extracting entities from sentence 1:
  Full Name: ['Arjun Reddy']
  Gender: ['Male']
  Date of Birth: ['25-03-1998']
  Nationality: No match found
  City: ['Hyderabad']
  Email Address: ['arjun.reddy@domain.in']
  Phone Number: ['91-9876543210']
  Aadhar Number: No match found
  PAN Card Details: ['ABCDE1234F']
  Voter ID Number: ['XYZ1234567']
  Passport Number: ['A1234567']
  Driving License Number: ['TS-10-2019-1234567']
  Bank Account Details: ['1234567890123456']
  Credit Card and Debit Card Information: No match found
  IP Address: No match found
  IFSC Code: No match found
  Expiry Date: No match found
  CVV: No match found

Extracting entities from sentence 2:
  Full Name: ['Kavya Sharma']
  Gender: ['Female']
  Date of Birth: ['01/11/2000']
  Nationality: No match found
  City: ['Bengaluru']
  Email Address: ['kavya.sharma@domain.com']
  Phone Number: ['9901234567']
  Aadhar Number: ['2345 6789 0123']
  PAN Card Details: ['PQRST6789X']
  Voter ID Number: No match found
  Passp