# Pivoting a DataFrame Based on Label Values In a Column

The goal of this application is to transform a table (dataframe) with normalized-like structured into a non-normalized structure.

In [7]:
import pandas as pd

### Test DataFrame Creation

In [8]:
def create_dataframe():
    return pd.DataFrame([['A', 1, 2], ['B', 3, 3], ['C', 1, 4]])

In [9]:
create_dataframe()

Unnamed: 0,0,1,2
0,A,1,2
1,B,3,3
2,C,1,4


### Tranformation Function

In [10]:
def transform(dataframe, label_column_name):

    def _rename_column(dataframe_):
        return dataframe_.rename({0: label_column_name}, axis=1)
    
    def _pivot(dataframe_):
        return dataframe_.pivot(columns=label_column_name)
    
    def _transpose_and_change_values(dataframe_):
        return dataframe_.transpose().fillna(0).astype(int)
    
    def _sort(dataframe_):
        return dataframe_.sort_index(level=1)

    def _reset_index(dataframe_):
        return dataframe_.reset_index().drop('level_0', axis=1)

    def _consolidate(dataframe_):
        dataframe_.iloc[:, 1] = dataframe_.sum(axis=1)
        return dataframe_.drop([1, 2], axis=1)

    df_transformed = _sort(_transpose_and_change_values(_pivot(_rename_column(dataframe))))
    return _consolidate(_reset_index(df_transformed))

In [11]:
transform(create_dataframe(), 'Label') 

Unnamed: 0,Label,0
0,A,1
1,A,2
2,B,3
3,B,3
4,C,1
5,C,4


In [12]:
%timeit transform(create_dataframe(), 'Label')

8.92 ms ± 236 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
