In [1]:
def flag_trust_co_columns(df, owner_name, address, trust_filter, co_filter):
    
    # Flag if address has c/o and trustee terms, extract out column that it is applicable to

    df["address_with_trustee_co"] = df[address].apply(
        lambda row: row[
            row.str.contains("|".join(co_filter + ["trustee"]), na=False, case=False)
        ].index.tolist(),
        axis=1,
    )
    

    df["address_with_trustee_co"] = df["address_with_trustee_co"].apply(
        lambda x: 0 if len(x) == 0 else x
    )
    
    # Calculate how many of the address fields have trustee/co in them

    df["address_with_trustee_co_len"] = (
        (df["address_with_trustee_co"].str.len().fillna(0))
        if (df.address_with_trustee_co.dtype == "O")
        else (0)
    )
    
    # Flag if name has c/o and trustee terms

    df["name_with_trustee_co"] = np.where(
        df[owner_name].str.contains(
            "|".join(co_filter + ["trustee"]), na=False, case=False
        ),
        1,
        0,
    )

    # Flag if name has any miscellaneous trust/co names

    df["name_with_trust_co_terms"] = np.where(
        df[owner_name].str.contains(
            "|".join(trust_filter + co_filter), na=False, case=False
        ),
        1,
        0,
    )
    
    # Get count of total trustee/co flags there are

    df["n_flag_trustee_co"] = (
        df["address_with_trustee_co_len"]
        + df["name_with_trustee_co"]
        + df["name_with_trust_co_terms"]
    )

    return df

In [3]:
def clean_trust_co_columns(df, co_filter, owner_name, address):

    # Create adjusted columns for the owner name and addresses

    for i in address:

        df[i + "_adj"] = np.nan

    df[owner_name + "_adj"] = np.nan

    # Create the masking filters to apply specific cleaning instructions to

    mask_1 = (df.address_with_trustee_co_len > 1) | (df.n_flag_trustee_co == 0)

    mask_2 = (
     #   (df.address_with_trustee_co != 0) &
         (df.name_with_trustee_co == 0)
        & (df.address_with_trustee_co_len == 1)
    )

    mask_3 = (
     #   (df.address_with_trustee_co != 0)&
         (df.name_with_trustee_co == 1)
        & (df.address_with_trustee_co_len == 1)
    )

    mask_4 = (df.address_with_trustee_co == 0) & (df.n_flag_trustee_co > 0)

    # For the columns that we see more than one address field have trustee/co terms, don't change anything
    # For columns with no trustee or c/o terms at all, don't change anything

    df_1 = df[mask_1].reset_index(drop=True)

    # For columns that have a trustee_co term in the address, and name without trustee/co

    df_2 = df[mask_2].reset_index(drop=True)

    for index, row in df_2.iterrows():

        address_col = row["address_with_trustee_co"][0]

        df_2.loc[index, owner_name + "_adj"] = re.sub(
            "|".join(trust_filter + co_filter),
            "",
            df_2[address_col][index],
            flags=re.IGNORECASE,
        )

        df_2.loc[index, address_col + "_adj"] = "Remove"

    # For columns that have a trustee_co term in the address, but trustee/co in the name

    df_3 = df[mask_3].reset_index(drop=True)

    df_3[owner_name + "_adj"] = df_3[owner_name].str.replace(
        "|".join(trust_filter + co_filter), "", regex=True, case=False
    )

    for index, row in df_3.iterrows():

        address_col = [x + "_adj" for x in row["address_with_trustee_co"]]
        df_3.loc[index, address_col] = "Remove"

    # For columns that no trustee/co terms in the address at all, but trust and c/lo terms in the name

    df_4 = df[mask_4].reset_index(drop=True)

    df_4[owner_name + "_adj"] = df_4[owner_name].str.replace(
        "|".join(trust_filter + co_filter), "", regex=True, case=False
    )

    # Concat the new cleaned data frames into one

    df = pd.concat([df_1, df_2, df_3, df_4]).reset_index(drop=True)  # df3,

    return df