### Clinical trials data

In [30]:
import pandas as pd

In [31]:
trials = {'id':[1, 2, 3, 4],
         'treatment':['A','A','B','B'],
         'gender':['F','M','F','M'],
         'response':[5, 3, 8, 9]}

In [32]:
trials = pd.DataFrame(trials)

### Reshaping by pivoting

In [33]:
trials.pivot(index='treatment',
            columns='gender',
            values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,5,3
B,8,9


### Pivotng multiple columns

In [34]:
trials.pivot(index='treatment',columns ='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


# Stacking & unstacking DataFrames

### Creating a multi-level index

In [35]:
trials

Unnamed: 0,gender,id,response,treatment
0,F,1,5,A
1,M,2,3,A
2,F,3,8,B
3,M,4,9,B


In [36]:
trials = trials.set_index(['treatment', 'gender'])

In [37]:
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


### Unstacking a multi-index

In [38]:
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [39]:
trials.unstack(level='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [40]:
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [41]:
trials.unstack(level=1)

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


### Stacking DataFrames

In [42]:
trials_by_gender = trials.unstack(level='gender')

In [43]:
trials_by_gender

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [44]:
trials_by_gender.stack(level='gender')

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [45]:
stacked = trials_by_gender.stack(level='gender')

In [46]:
stacked

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


### Swapping levels

In [47]:
swapped = stacked.swaplevel(0,1)

In [48]:
swapped

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
gender,treatment,Unnamed: 2_level_1,Unnamed: 3_level_1
F,A,1,5
M,A,2,3
F,B,3,8
M,B,4,9


### Sorting rows

In [49]:
sorted_trials = swapped.sort_index()

In [50]:
sorted_trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
gender,treatment,Unnamed: 2_level_1,Unnamed: 3_level_1
F,A,1,5
F,B,3,8
M,A,2,3
M,B,4,9


# Melting DataFrames

### Clinical trials data

In [52]:
trials = {'id':[1, 2, 3, 4],
         'treatment':['A','A','B','B'],
         'gender':['F','M','F','M'],
         'response':[5, 3, 8, 9]}

In [53]:
trials = pd.DataFrame(trials)

In [54]:
trials

Unnamed: 0,gender,id,response,treatment
0,F,1,5,A
1,M,2,3,A
2,F,3,8,B
3,M,4,9,B


### Clinical trials after pivoting

In [55]:
trials.pivot(index='treatment', columns='gender',
            values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,5,3
B,8,9


### Clinical trials data

In [56]:
new_trials = {'treatment':['A', 'B'],
             'F':[5, 8],
             'M':[3, 9]}

In [57]:
new_trials = pd.DataFrame(new_trials)

In [59]:
new_trials

Unnamed: 0,F,M,treatment
0,5,3,A
1,8,9,B


### Melting DataFrame

In [60]:
pd.melt(new_trials)

Unnamed: 0,variable,value
0,F,5
1,F,8
2,M,3
3,M,9
4,treatment,A
5,treatment,B


### Specifying id_vars

In [61]:
pd.melt(new_trials, id_vars=['treatment'])

Unnamed: 0,treatment,variable,value
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


### Specifying value_vars

In [62]:
pd.melt(new_trials, id_vars=['treatment'],
       value_vars=['F', 'M'])

Unnamed: 0,treatment,variable,value
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


### Specifying value_name

In [63]:
pd.melt(new_trials, id_vars=['treatment'],
       var_name='gender', value_name='response')

Unnamed: 0,treatment,gender,response
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


# Pivot tables

### More clinical trials data

In [64]:
more_trials = {'id':[1, 2, 3, 4, 5, 6, 7, 8],
              'treatment':['A','A','A','A',
                          'B','B','B','B'],
              'gender':['F','M','M','F',
                       'F','M','F','F'],
              'response':[5, 3, 8, 9,
                         1, 8, 4, 6]}

In [65]:
more_trials = pd.DataFrame(more_trials)

In [66]:
more_trials

Unnamed: 0,gender,id,response,treatment
0,F,1,5,A
1,M,2,3,A
2,M,3,8,A
3,F,4,9,A
4,F,5,1,B
5,M,6,8,B
6,F,7,4,B
7,F,8,6,B


### Rearranging by pivoting

In [67]:
more_trials.pivot(index='treatment',
                 columns='gender',
                 values='response')

ValueError: Index contains duplicate entries, cannot reshape

### Pivot table

In [68]:
more_trials.pivot_table(index='treatment',
                       columns='gender',
                       values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,7.0,5.5
B,3.666667,8.0


### Other aggregations

In [69]:
more_trials.pivot_table(index='treatment',
                       columns='gender',
                       values='response',
                       aggfunc='count')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,2,2
B,3,1
