In [1]:
import pandas as pd
import numpy as np

In [2]:
series2 = pd.Series(
    [
        "New England Patriots",
        "Denver|Broncos",
        "Carolina.Panthers",
        "Philadelphia-Eagles",
    ]*100000
)

In [3]:
chars = pd.Series([" ", "|", ".", "-"]*100000)
idxs = pd.Series([2, 1, 2, 1]*100000)

In [4]:
def split(df, char, idx):
    idx -= 1
    return df.split(char)[idx]

In [6]:
%timeit np.vectorize(split)(series2, chars, idxs)

177 ms ± 3.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### pd.apply() method replicated below

In [5]:
def is_series(a) -> bool:
    """Checks if value is a pandas series."""
    return isinstance(a, pd.Series)

def scalar_to_repeating_series(possible_scalar, compared_to):
    """Conform input into a series that is the same size as the comparison obj, possibly by repeating the input value.

    This helper may be used where both scalar values and series should be supported by the caller.

    Args:
        possible_scalar: The input to be conformed (either a scalar or series)
        compared_to: The comparison object to determine the size of the output series (either a series of dataframe)

    Returns:
        Series of values that is the same size as the comparison obj, repeating possible_scalar if necessary.
    """
    if isinstance(compared_to, pd.DataFrame):
        num_rows, _ = compared_to.shape
    elif isinstance(compared_to, pd.Series):
        num_rows = compared_to.size
    else:
        raise ValueError(f"Unsupported comparison obj {type(compared_to)}.")

    if isinstance(possible_scalar, pd.Series):
        if possible_scalar.size == num_rows:
            return possible_scalar
        else:
            raise ValueError(
                f"Series {possible_scalar} does not match the size of the comparison obj. "
                "Expected num rows: {num_rows}."
            )

    return pd.Series([possible_scalar]).repeat(num_rows).reset_index(drop=True)

def split_(df, i):
    text = df["text"][i]
    delimiter = df["delimiters"][i]
    index = df["index"][i]

    chunks = text.split(delimiter)
    if index < len(chunks):
        return chunks[index]
    else:
        return chunks[0]

def execute(text=series2, char=chars, idx=idxs):
    idx = pd.Series([1, 0, 1, 0]*100000)
    if is_series(text):
        if is_series(char) or is_series(idx):
            char_series = scalar_to_repeating_series(char, text)
            idx_series = scalar_to_repeating_series(idx, text)

            split_df = pd.DataFrame(
                {"text": text, "delimiters": char_series, "index": idx_series}
            ).reset_index()
            return pd.Series(split_df.index.values).apply(lambda i: split_(split_df, i))

        return text.str.split(char, expand=True)[idx]

    return text.split(char)[idx]

In [7]:
%timeit execute(text=series2, char=chars, idx=idxs)

13.7 s ± 250 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
