# Pivoting DataFrames

In [1]:
import pandas as pd

In [8]:
trials = pd.read_csv('trials_01.csv')

### Clinical trials data

In [10]:
trials

Unnamed: 0,id,treatment,gender,response
0,1,A,F,5
1,2,A,M,3
2,3,B,F,8
3,4,B,M,9


### Reshaping by pivoting

In [14]:
# use the 2nd df.pivot. Dont know why!!!
trials.pivot(index='treatment',
            columns= 'gender',
            values= 'response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,5,3
B,8,9


In [15]:
trials.pivot(index='treatment', columns='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [18]:
users = pd.read_csv('visitors.csv')
users

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [19]:
## Pivoting a single variable

#Pivot the users DataFrame with the rows indexed by 'weekday', 
#the columns indexed by 'city', and the values populated with 'visitors'.


# Pivot the users DataFrame: visitors_pivot
visitors_pivot = users.pivot(index='weekday',
                            columns='city',
                            values='visitors')

# Print the pivoted DataFrame
print(visitors_pivot)


city     Austin  Dallas
weekday                
Mon         326     456
Sun         139     237


In [31]:
users.pivot(index='weekday',columns='city')

Unnamed: 0_level_0,visitors,visitors,signups,signups
city,Austin,Dallas,Austin,Dallas
weekday,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mon,326,456,3,5
Sun,139,237,7,12


In [33]:
users.pivot(index='weekday',columns='city', values='signups')

city,Austin,Dallas
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,3,5
Sun,7,12


In [34]:
# Pivot users with signups indexed by weekday and city: signups_pivot
signups_pivot = users.pivot(index='weekday',columns='city', values='signups')

# Print signups_pivot
print(signups_pivot)

# Pivot users pivoted by both signups and visitors: pivot
pivot = users.pivot(index='weekday', columns='city')

# Print the pivoted DataFrame
print(pivot)


city     Austin  Dallas
weekday                
Mon           3       5
Sun           7      12
        visitors        signups       
city      Austin Dallas  Austin Dallas
weekday                               
Mon          326    456       3      5
Sun          139    237       7     12


# Stacking  and unstacking Dataframes

### Creating a nulti-level index
- input : list of column labels to use as tuples in the multi=level index, ('treatment','gender')

Pivot wont work bc of the multi-level index

In [35]:
trials

Unnamed: 0,id,treatment,gender,response
0,1,A,F,5
1,2,A,M,3
2,3,B,F,8
3,4,B,M,9


In [36]:
trials = trials.set_index(['treatment','gender'])
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


### Unstacking a multi-index (1)
Similar to pivot method

- move some of the index lebvels to columns, making our DF shorter and wider(more cols, few rows)

ex: move the second level of the index, (gender) to the columns using `.unstack`

- now we have hierarchical cols

In [38]:
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [39]:
trials.unstack(level='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


### Unstacking a multi-index (2)

same result, diffrent way

In [42]:
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [43]:
trials.unstack(level=1)

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


### Stacking DataFrames


In [44]:
trials_by_gender = trials.unstack(level='gender')
trials_by_gender

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [45]:
trials_by_gender.stack(level='gender')

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [46]:
### Stacking DF


In [48]:
stacked = trials_by_gender.stack(level='gender')
stacked

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


### Swapping levels

In [51]:
swapped = stacked.swaplevel(0,1)
swapped

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
gender,treatment,Unnamed: 2_level_1,Unnamed: 3_level_1
F,A,1,5
M,A,2,3
F,B,3,8
M,B,4,9


### Sorting rows

In [53]:
sorted_trials = swapped.sort_index()
sorted_trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
gender,treatment,Unnamed: 2_level_1,Unnamed: 3_level_1
F,A,1,5
F,B,3,8
M,A,2,3
M,B,4,9


In [60]:
# Stacking & unstacking I
users = pd.read_csv('users.csv')


users = users.set_index(['city','weekday']).sort_index()  # replicate from code
users

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Austin,Mon,2,326,3
Austin,Sun,0,139,7
Dallas,Mon,3,456,5
Dallas,Sun,1,237,12


In [64]:
users.columns

Index(['Unnamed: 0', 'visitors', 'signups'], dtype='object')

In [65]:
# Unstack users by 'weekday': byweekday
byweekday = users.unstack(level='weekday')

# Print the byweekday DataFrame
print(byweekday)

# Stack byweekday by 'weekday' and print it
print(byweekday.stack(level='weekday'))

        Unnamed: 0     visitors      signups    
weekday        Mon Sun      Mon  Sun     Mon Sun
city                                            
Austin           2   0      326  139       3   7
Dallas           3   1      456  237       5  12
                Unnamed: 0  visitors  signups
city   weekday                               
Austin Mon               2       326        3
       Sun               0       139        7
Dallas Mon               3       456        5
       Sun               1       237       12


In [66]:
# Stacking & unstacking II

# Unstack users by 'city': bycity
bycity = users.unstack(level='city')

# Print the bycity DataFrame
print(bycity)

# Stack bycity by 'city' and print it
print(bycity.stack(level='city'))


        Unnamed: 0        visitors        signups       
city        Austin Dallas   Austin Dallas  Austin Dallas
weekday                                                 
Mon              2      3      326    456       3      5
Sun              0      1      139    237       7     12
                Unnamed: 0  visitors  signups
weekday city                                 
Mon     Austin           2       326        3
        Dallas           3       456        5
Sun     Austin           0       139        7
        Dallas           1       237       12


In [67]:
## Restoring the index order
# The goal here is to convert bycity back to something that looks like users.


# Stack 'city' back into the index of bycity: newusers
newusers = bycity.stack(level='city')

# Swap the levels of the index of newusers: newusers
newusers = newusers.swaplevel(0,1)

# Print newusers and verify that the index is not sorted
print(newusers)

# Sort the index of newusers: newusers
newusers = newusers.sort_index()

# Print newusers and verify that the index is now sorted
print(newusers)

# Verify that the new DataFrame is equal to the original
print(newusers.equals(users))

                Unnamed: 0  visitors  signups
city   weekday                               
Austin Mon               2       326        3
Dallas Mon               3       456        5
Austin Sun               0       139        7
Dallas Sun               1       237       12
                Unnamed: 0  visitors  signups
city   weekday                               
Austin Mon               2       326        3
       Sun               0       139        7
Dallas Mon               3       456        5
       Sun               1       237       12
True


In [68]:
# Melting DataFrames

### Clinical trials data


In [72]:
trials = pd.read_csv('trials_01.csv')
trials

Unnamed: 0,id,treatment,gender,response
0,1,A,F,5
1,2,A,M,3
2,3,B,F,8
3,4,B,M,9


### Clinical trials after pivoting

In [73]:
trials.pivot(index='treatment', columns='gender', values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,5,3
B,8,9


In [74]:
new_trials = pd.read_csv('trials_02.csv')
new_trials

Unnamed: 0,treatment,F,M
0,A,5,3
1,B,8,9


In [75]:
### Melting DataFrame

In [76]:
pd.melt(new_trials)

Unnamed: 0,variable,value
0,treatment,A
1,treatment,B
2,F,5
3,F,8
4,M,3
5,M,9


### Specifying id_vars

In [77]:
pd.melt(new_trials, id_vars=['treatment'])

Unnamed: 0,treatment,variable,value
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9
