<a href="https://colab.research.google.com/github/gr3ybr0w/cookbook/blob/master/pandas/preprocess/create_dummies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
def create_dummy_df(df, cat_cols, drop_first=True, dummy_na):
    '''
    This function takes a dataframe and a list of the categoical columns to changes into dummies.
    If working with a linear model the set drop_first to True so that one column is represented by all other columns being 0,
    this is not needed when working with decisions trees.
    
    INPUT:
    df - pandas dataframe with categorical variables you want to dummy
    cat_cols - list of strings that are associated with names of the categorical columns
    dummy_na - Bool holding whether you want to dummy NA vals of categorical columns or not
    
    OUTPUT:
    df - a new dataframe that has the following characteristics:
            1. contains all columns that were not specified as categorical
            2. removes all the original columns in cat_cols
            3. dummy columns for each of the categorical columns in cat_cols
            4. if dummy_na is True - it also contains dummy columns for the NaN values
            5. Use a prefix of the column name with an underscore (_) for separating 
    '''
    for col in  cat_cols:
        try:
            # for each cat add dummy var, drop original column
            df = pd.concat([df.drop(col, axis=1), pd.get_dummies(df[col], prefix=col, prefix_sep='_', drop_first=drop_first, dummy_na=dummy_na)], axis=1)
        except:
            continue

    return df