In [3]:
!jupyter nbextension enable --py widgetsnbextension

usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: kernel kernelspec migrate run troubleshoot

Jupyter command `jupyter-nbextension` not found.


Import Libraries for data profiling

In [4]:
import numpy as np
import pandas as pd
from ydata_profiling import ProfileReport

Profile of Acme Contacts

In [10]:
acme_df = pd.read_csv ('data/acme__contacts.csv')
profile = ProfileReport(acme_df, title="Acme Contacts Profiling Report")
profile.to_notebook_iframe()


Summarize dataset: 100%|██████████| 25/25 [00:00<00:00, 49.62it/s, Completed]                                   
Generate report structure: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  3.64it/s]


Profile of CRM Contacts

In [11]:
crm_df = pd.read_csv ('data/crm__contacts.csv')
profile = ProfileReport(crm_df, title="CRM Contacts Profiling Report")
profile.to_notebook_iframe()

Summarize dataset: 100%|██████████| 19/19 [00:00<00:00, 45.99it/s, Completed]                      
Generate report structure: 100%|██████████| 1/1 [00:02<00:00,  2.71s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  4.04it/s]


Profile of Rapid Data Contacts

In [12]:
rd_df = pd.read_csv ('data/rapid_data__contacts.csv')
profile = ProfileReport(rd_df, title="Rapid Data Contacts Profiling Report")
profile.to_notebook_iframe()

Summarize dataset: 100%|██████████| 21/21 [00:00<00:00, 51.26it/s, Completed]                      
Generate report structure: 100%|██████████| 1/1 [00:03<00:00,  3.32s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  4.42it/s]


In [22]:
import re

def get_name_pattern(full_name):
    pattern = [
        '{prefix}.', '{prefix}', '{fi}.', '{first}', '{mi}.', '{middle}', '{last}', '{suffix}.', '{honorific}'
    ]

    # Tokenize the input full name
    tokens = re.findall(r'\b\w+\b', full_name)

    # Iterate through tokens to determine the pattern
    for token in tokens:
        if token.lower() in ['dr', 'prof', 'sir', 'mr', 'mrs', 'ms', 'miss']:
            pattern[0] += '.' if '.' in token else ''
        elif token.lower() in ['jr', 'sr', 'i', 'ii', 'iii', 'iv']:
            pattern[5] += '.' if '.' in token else ''
        elif token in ['jd', 'md', 'phd']:
            pattern[6] += f' {token}'
        elif len(token) == 1 and token.endswith('.'):
            pattern[1] += '.'
        elif '.' in token:
            # Handle middle initials or prefixes with periods
            pattern[3] += ' '
        else:
            # Determine if it's the first, middle, or last name
            if '{first}' not in pattern:
                pattern[2] = '{first}'
            elif '{last}' not in pattern:
                pattern[4] = '{last}'
            else:
                pattern[3] += '{middle} '

    # Clean up the pattern
    pattern[3] = pattern[3].strip()

    # Construct the final pattern string
    result = ' '.join(pattern).strip()

    return result

# Example usage
name1 = 'Dr. John Smith II, PhD'
name2 = 'Miss Jane Doe'
name3 = 'Smith, John'
name4 = 'J. Doe'
name5 = 'J.R.R. Tolkien'
name6 = 'Prof. George R. R. Martin'
name7 = 'Sammy Davis Jr.'

print(get_name_pattern(name1))
print(get_name_pattern(name2))
print(get_name_pattern(name3))
print(get_name_pattern(name4))
print(get_name_pattern(name5))
print(get_name_pattern(name6))
print(get_name_pattern(name7))

{prefix}. {fi}. {first} {middle}{middle} {middle} {middle} {last} {suffix}. {honorific}
{prefix}. {fi}. {first} {middle}{middle} {middle} {last} {suffix}. {honorific}
{prefix}. {fi}. {first} {middle}{middle} {middle} {last} {suffix}. {honorific}
{prefix}. {fi}. {first} {middle}{middle} {middle} {last} {suffix}. {honorific}
{prefix}. {fi}. {first} {middle}{middle} {middle} {middle} {middle} {last} {suffix}. {honorific}
{prefix}. {fi}. {first} {middle}{middle} {middle} {middle} {middle} {last} {suffix}. {honorific}
{prefix}. {fi}. {first} {middle}{middle} {middle} {last} {suffix}. {honorific}
