In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.DataFrame.from_dict({
    'salary': np.random.rand(1000),
    'category': np.random.choice(['flexworker','freelancers', 'contracts', 'permanent workers', 'consultants', 'seizoen'], size=1000)
})

df.head()

Unnamed: 0,salary,category
0,0.270001,seizoen
1,0.05506,seizoen
2,0.659557,seizoen
3,0.027723,contracts
4,0.128917,permanent workers


In [None]:
df_example = (
    df.assign(
        new_category=lambda d: np.where(
            d.category=='freelancers',
            'flexwork',
            np.where(
                d.category=='flexworker',
                'flexwork',
                np.where(
                    d.category=='consultants',
                    'flexwork',
                    np.where(
                        d.category=='seizoen',
                        'flexwork',
                        'contract'
                    )
                )
            )
        )
    )
)
df_example.head(25)

Unnamed: 0,salary,category,new_category
0,0.270001,seizoen,flexwork
1,0.05506,seizoen,flexwork
2,0.659557,seizoen,flexwork
3,0.027723,contracts,contract
4,0.128917,permanent workers,contract
5,0.689392,consultants,flexwork
6,0.376281,flexworker,flexwork
7,0.682801,freelancers,flexwork
8,0.896719,consultants,flexwork
9,0.36609,seizoen,flexwork


As you can see, the nested "where" usually has three parts:


*   The "True" category, where one of the conditions evaluates to true. Then, the item in this category should be assigned
*   The conditions
*   The "False" category; if all conditions are false, then this item should be assigned.

*Note*: I've seen some uses where the `np.where` is nested with different conditions and true/false answers, but in that case you could just `assign` them sequentually. 



In [None]:
def np_where_binary(*args):
    if len(args) <= 4:
        raise AssertionError("No conditions are given to np_where_binary")
    
    df = args[0]
    name = args[1]
    true_category = args[2]
    false_category = args[3]


    df_data = (
        df
        .copy()
    ) 

    if name not in df_data.columns:
        assign_dict = {}
        assign_dict[name] = lambda d: "unknown"
        df_data = df_data.assign(**assign_dict)
    
    for i in range(4, len(args)):
        # Each of these is a condition. 
        assign_dict = {}
        assign_dict[name] = lambda d: np.where(
            d[name] == true_category,
            true_category if not callable(true_category) else true_category(d),
            np.where(
                args[i](d),
                true_category,
                false_category if not callable(false_category) else false_category(d)
            )
        )
        
        df_data = df_data.assign(**assign_dict)

    return df_data

df_binary = (
    df
    .pipe(
        np_where_binary,
        "new_category",
        "flexwork",
        "contract",
        lambda d: d.category=='freelancers', 
        lambda d: d.category=='flexworker', 
        lambda d: d.category=='consultants',
        lambda d: d.category=='seizoen',
    )
)
df_binary.head(25)

Unnamed: 0,salary,category,new_category
0,0.270001,seizoen,flexwork
1,0.05506,seizoen,flexwork
2,0.659557,seizoen,flexwork
3,0.027723,contracts,contract
4,0.128917,permanent workers,contract
5,0.689392,consultants,flexwork
6,0.376281,flexworker,flexwork
7,0.682801,freelancers,flexwork
8,0.896719,consultants,flexwork
9,0.36609,seizoen,flexwork


So what about the multiple cases???

In [None]:
df_multiple = (
    df
    .pipe(
        np_where_binary,
        "new_category",
        "flexwork",
        "contract",
        lambda d: d.category=='freelancers', 
        lambda d: d.category=='flexworker', 
        lambda d: d.category=='consultants',
        lambda d: d.category=='seizoen',
    )
    .pipe(
        np_where_binary,
        "new_category",
        "expensive",
        lambda d: d.new_category, # by putting in a function for the false category, we can assign the value already there
        lambda d: d.category=='freelancers', 
        lambda d: d.category=='consultants'
    )
)
df_multiple.head(25)

Unnamed: 0,salary,category,new_category
0,0.270001,seizoen,flexwork
1,0.05506,seizoen,flexwork
2,0.659557,seizoen,flexwork
3,0.027723,contracts,contract
4,0.128917,permanent workers,contract
5,0.689392,consultants,expensive
6,0.376281,flexworker,flexwork
7,0.682801,freelancers,expensive
8,0.896719,consultants,expensive
9,0.36609,seizoen,flexwork
