In [1]:
import json
import logging
import re
import warnings
from pathlib import Path
from pprint import pprint
from typing import Annotated, Any, Generator, Literal, Type, TypeVar

# Standard imports
import numpy as np
import numpy.typing as npt
import pandas as pd
import polars as pl

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)
pl.Config.set_tbl_rows(n=200)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [None]:
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme({
    "white": "#FFFFFF",  # Bright white
    "info": "#00FF00",  # Bright green
    "warning": "#FFD700",  # Bright gold
    "error": "#FF1493",  # Deep pink
    "success": "#00FFFF",  # Cyan
    "highlight": "#FF4500",  # Orange-red
})
console = Console(theme=custom_theme)


def create_path(path: str | Path) -> None:
    """
    Create parent directories for the given path if they don't exist.

    Parameters
    ----------
    path : str | Path
        The file path for which to create parent directories.

    """
    Path(path).parent.mkdir(parents=True, exist_ok=True)


def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

In [3]:
go_up_from_current_directory(go_up=1)

/Users/neidu/Desktop/Projects/Personal/batch-process


In [169]:
import spacy

nlp = spacy.load("en_core_web_md")


def smart_capitalize(text: str) -> str:
    """
    Capitalizes proper nouns and named entities in the input text.

    Parameters
    ----------
    text : str
        The input text to process.

    Returns
    -------
    str
    """
    doc = nlp(text)
    tokens: list[str] = []
    for token in doc:
        if token.pos_ == "PROPN" or token.pos_ == "NOUN" or token.ent_type_ != "":
            tokens.append(token.text.capitalize())
        else:
            print(token.text)
            tokens.append(token.text.lower())
    return " ".join(tokens)

In [170]:
smart_capitalize("A-CASH WDR BY OLABODE SHITTU AKINTUYI/17984202")

-
BY


'A - Cash Wdr by Olabode Shittu Akintuyi/17984202'

In [168]:
d = nlp("A-CASH WDR BY OLABODE SHITTU AKINTUYI/17984202")

for token in d:
    print(token.text, token.pos_, token.ent_type_)

A NOUN 
- PUNCT 
CASH NOUN 
WDR NOUN 
BY ADP 
OLABODE PROPN ORG
SHITTU PROPN ORG
AKINTUYI/17984202 PROPN 


In [None]:
token.

''