# Part 7

# Reshaping data frames

In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [7]:
dframe1 = DataFrame(np.arange(8).reshape((2, 4)),
                    index = pd.Index(['LA', 'SF'], name = 'city'),
                    columns = pd.Index(['A', 'B', 'C','D'], name = 'letter')) 
# by using pd.Index we can name the index and the columns
type(dframe1)

pandas.core.frame.DataFrame

In [10]:
dframe_st = dframe1.stack() # "pivots" rows into columns - actually melts wide into long format
type(dframe_st)

pandas.core.series.Series

In [11]:
dframe_st.unstack() # casts long into wide format

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SF,4,5,6,7


In [12]:
dframe_st.unstack('city') # specify which column to unstack by

city,LA,SF
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [17]:
ser1 = Series([0, 1, 2], index=['Q', 'X', 'Y'])
ser1

Q    0
X    1
Y    2
dtype: int64

In [18]:
ser2 = Series([4, 5, 6], index=['X', 'Y', 'Z'])
ser2

X    4
Y    5
Z    6
dtype: int64

In [19]:
dframe = pd.concat([ser1,ser2],keys = ['Alpha','Beta'])
dframe

Alpha  Q    0
       X    1
       Y    2
Beta   X    4
       Y    5
       Z    6
dtype: int64

In [None]:
# Not really a DF yet - just two series concatenated into a single series

In [20]:
df_new1 = dframe.unstack() # turns it into a DF
df_new1

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1.0,2.0,
Beta,,4.0,5.0,6.0


In [21]:
df_new1.stack() # automatically filters out null values

Alpha  Q    0.0
       X    1.0
       Y    2.0
Beta   X    4.0
       Y    5.0
       Z    6.0
dtype: float64

In [24]:
dframe = dframe.unstack()
dframe

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1.0,2.0,
Beta,,4.0,5.0,6.0


In [25]:
dframe.stack() # doesn't keep null values

Alpha  Q    0.0
       X    1.0
       Y    2.0
Beta   X    4.0
       Y    5.0
       Z    6.0
dtype: float64

In [22]:
df_new1.stack(dropna = False) # retains null values

Alpha  Q    0.0
       X    1.0
       Y    2.0
       Z    NaN
Beta   Q    NaN
       X    4.0
       Y    5.0
       Z    6.0
dtype: float64

# Pivoting

In [26]:
dframe = pd.read_csv("data/dummydata2.csv")

In [27]:
dframe

Unnamed: 0,date,city,total_sales
0,2018-04-03,London,31.2
1,2018-04-04,London,29.8
2,2018-04-05,London,30.3
3,2018-04-03,Tokyo,26.7
4,2018-04-04,Tokyo,28.4
5,2018-04-05,Tokyo,27.7
6,2018-04-03,Delhi,33.2
7,2018-04-04,Delhi,26.3
8,2018-04-05,Delhi,23.8
9,2018-04-03,Chicago,29.5


In [28]:
dframe_piv = dframe.pivot('date','city','total_sales') # row,column,value
dframe_piv

city,Chicago,Delhi,London,Tokyo
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-04-03,29.5,33.2,31.2,26.7
2018-04-04,32.2,26.3,29.8,28.4
2018-04-05,30.7,23.8,30.3,27.7


In [None]:
dframe.pivot('city','date','total_sales') # R users - you can do this by typing xtabs(total_sales ~ city + date)

## End of part 7