In [1]:
import pandas as pd
import numpy as np

### Declaring a Series

In [3]:
s = pd.Series([12, -4, 7, 9]) #Series must be capital for initial word if not it will be error
s

0    12
1    -4
2     7
3     9
dtype: int64

In [10]:
s = pd.Series([2,1,3,4], index=['a','b','c','d']) #If error, try to check single or double quotes
s

#s = pd.Series([12,-4,7,9], index=['a','b','c','d'])
#s

a    2
b    1
c    3
d    4
dtype: int64

In [14]:
s.values #To see values

array([2, 1, 3, 4], dtype=int64)

In [15]:
s.index #To see index

Index(['a', 'b', 'c', 'd'], dtype='object')

### Selecting the Internal Elements

In [23]:
s = pd.Series([2,1,3,4], index=['a','b','c','d']) # If you want to call internal element, must to make sure the Series is available

In [24]:
s[2]

3

In [25]:
s['d']

4

In [26]:
s[0:2]

a    2
b    1
dtype: int64

In [28]:
s[['a','d']] # Must use double square brackets

a    2
d    4
dtype: int64

### Assigning Values to the Elements (To input new values to the elements)

In [29]:
s[2] = 8
s

a    2
b    1
c    8
d    4
dtype: int64

In [30]:
s['c'] = 3
s

a    2
b    1
c    3
d    4
dtype: int64

### Defining Series from NumPy Arrays and Other Series

In [31]:
arr = np.array([3,2,4,5,6])
s2 = pd.Series(arr) #To put series from array
s2

0    3
1    2
2    4
3    5
4    6
dtype: int32

In [32]:
s3 = pd.Series(s)
s3

a    2
b    1
c    3
d    4
dtype: int64

In [33]:
s2

0    3
1    2
2    4
3    5
4    6
dtype: int32

In [34]:
s

a    2
b    1
c    3
d    4
dtype: int64

In [36]:
arr[1]=4 #To change value of s2 that is contain array
s2

0    3
1    4
2    4
3    5
4    6
dtype: int32

In [41]:
s[s>3] #To show filtering does not call variable s again, because it is result as s series not the result of filtering

d    4
dtype: int64

### Evaluating Values

In [42]:
serd = pd.Series([1,0,2,1,2,3], index=['white','white','blue','green','green','yellow'])
serd

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [43]:
serd.unique() #To know all the values contained within the Series excluding duplicates

array([1, 0, 2, 3], dtype=int64)

In [44]:
serd.value_counts() #Calculates occurrences within a Series

1    2
2    2
0    1
3    1
dtype: int64

In [46]:
serd.isin([0,3]) # Evaluates the membership, that is, given a list of values, this function lets you know if these values are contained within the data structure.

white     False
white      True
blue      False
green     False
green     False
yellow     True
dtype: bool

In [45]:
serd[serd.isin([0,3])] # Idem above

white     0
yellow    3
dtype: int64

In [47]:
s4 = pd.Series([5,-3,np.NaN,14]) #Add NaN value
s4

0     5.0
1    -3.0
2     NaN
3    14.0
dtype: float64

In [48]:
s4.isnull() #To identify the indexes without a value as bolean True

0    False
1    False
2     True
3    False
dtype: bool

s4.notnull() #To identify the indexes without a value as bolean False

In [50]:
s4[s4.notnull()] #To show values in s4 except NaN values

0     5.0
1    -3.0
3    14.0
dtype: float64

In [51]:
s4[s4.isnull()] #To show only NaN value

2   NaN
dtype: float64

### Series as Dictionary

In [58]:
mydict = {'red': 2000, 'blue': 1000, 'yellow': 500, 'orange': 1000}

In [59]:
myseries = pd.Series(mydict) #To call dict to be series
myseries

red       2000
blue      1000
yellow     500
orange    1000
dtype: int64

In [61]:
colors = ['red','yellow','orange','blue','green'] #Change index from dict to be array while the data with the corresponding values.
myseries = pd.Series(mydict, index=colors)
myseries

red       2000.0
yellow     500.0
orange    1000.0
blue      1000.0
green        NaN
dtype: float64

In [62]:
mydict2 = {'red':400,'yellow':1000,'black':700}
myseries2 = pd.Series(mydict2)
myseries + myseries2 #Operation between series

black        NaN
blue         NaN
green        NaN
orange       NaN
red       2400.0
yellow    1500.0
dtype: float64

### Defining the Data Frame

In [5]:
data = {'color' : ['blue','green','yellow','red','white'],
        'object' : ['ball','pen','pencil','paper','mug'],
        'price' : [1.2,1.0,0.6,0.9,1.7]} #This to build data frame with dict
frame = pd.DataFrame(data) #This to define dataframe of data variable
frame

Unnamed: 0,color,object,price
0,blue,ball,1.2
1,green,pen,1.0
2,yellow,pencil,0.6
3,red,paper,0.9
4,white,mug,1.7


In [6]:
frame2 = pd.DataFrame(data, columns=['object','price']) #To delete the column that is not using
frame2

Unnamed: 0,object,price
0,ball,1.2
1,pen,1.0
2,pencil,0.6
3,paper,0.9
4,mug,1.7


In [7]:
frame2 = pd.DataFrame(data, index=['one','two','three','four','five']) #To make indexes manually
frame2

Unnamed: 0,color,object,price
one,blue,ball,1.2
two,green,pen,1.0
three,yellow,pencil,0.6
four,red,paper,0.9
five,white,mug,1.7


In [8]:
frame3 = pd.DataFrame(np.arange(16).reshape((4,4)),
                      index=['red','blue','yellow','white'],
                      columns=['ball','pen','pencil','paper']) #np.arange(16).reshape((4,4)) that generates a 4x4 matrix of increasing numbers from 0 to 15.
frame3

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


### Assigning Values

In [9]:
frame.index.name = 'id'; frame.columns.name = 'item' #Add id attribute with name atribute and explain with column atribute where is item
frame

item,color,object,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,blue,ball,1.2
1,green,pen,1.0
2,yellow,pencil,0.6
3,red,paper,0.9
4,white,mug,1.7


In [10]:
ser = pd.Series(np.arange(5)) #Prepare input data from series to the dataframe
ser

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [11]:
frame['new'] = ser #Input data from series to the dataframe where column new
frame

item,color,object,price,new
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,blue,ball,1.2,0
1,green,pen,1.0,1
2,yellow,pencil,0.6,2
3,red,paper,0.9,3
4,white,mug,1.7,4


### Membership of a Value

In [16]:
frame.isin([1.0,'pen']) #To see membership of the values selecting

item,color,object,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,False,False,False
1,False,True,True
2,False,False,False
3,False,False,False
4,False,False,False


In [17]:
frame[frame.isin([1.0,'pen'])]

item,color,object,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,,,
1,,pen,1.0
2,,,
3,,,
4,,,


In [14]:
frame[frame.isin([1.0,'pen'])] #To get a DataFrame containing only Boolean values

item,color,object,price,new
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,,,,
1,,pen,1.0,1.0
2,,,,
3,,,,
4,,,,


### Deleting a Column

In [15]:
del frame['new']
frame

item,color,object,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,blue,ball,1.2
1,green,pen,1.0
2,yellow,pencil,0.6
3,red,paper,0.9
4,white,mug,1.7


### DataFrame from Nested dict

#### This data structure, when it is passed directly as an argument to the DataFrame() constructor, will be
#### interpreted by pandas so as to consider external keys as column names and internal keys as labels for the indexes.

In [18]:
nestdict = {'red':{2012: 22, 2013: 33},
            'white':{2011: 13, 2012: 22, 2013: 16},
            'blue': {2011: 17, 2012: 27, 2013: 18}}
frame2 = pd.DataFrame(nestdict)
frame2

Unnamed: 0,red,white,blue
2012,22.0,22,27
2013,33.0,16,18
2011,,13,17


### Transposition of a DataFrame

In [19]:
frame2.T

Unnamed: 0,2012,2013,2011
red,22.0,33.0,
white,22.0,16.0,13.0
blue,27.0,18.0,17.0


### Methods on Index

In [22]:
ser = pd.Series([5,0,3,8,4], index=['red','blue','yellow','white','green'])
ser

red       5
blue      0
yellow    3
white     8
green     4
dtype: int64

In [23]:
ser.idxmin()

'blue'

In [24]:
ser.idxmax()

'white'

### Index with Duplicate Labels

In [25]:
serd = pd.Series(range(6), index=['white','white','blue','green','green','yellow'])
serd

white     0
white     1
blue      2
green     3
green     4
yellow    5
dtype: int64

In [26]:
serd.index.is_unique

False

In [27]:
frame.index.is_unique

True

### Reindexing

In [28]:
ser3 = pd.Series([1,5,6,3],index=[0,3,5,6]) #The index column is not a perfect sequence of numbers
ser3

0    1
3    5
5    6
6    3
dtype: int64

In [30]:
ser3.reindex(range(6),method='ffill') #The value of index 1 and 2 follow the index 0, where the value is not follow to interpolation

0    1
1    1
2    1
3    5
4    5
5    6
dtype: int64

In [31]:
ser3.reindex(range(6),method='bfill') #Add value into interpolation

0    1
1    5
2    5
3    5
4    6
5    6
dtype: int64

In [32]:
frame.reindex(range(5), method='ffill',columns=['colors','price','new','object'])

item,colors,price,new,object
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,blue,1.2,blue,ball
1,green,1.0,green,pen
2,yellow,0.6,yellow,pencil
3,red,0.9,red,paper
4,white,1.7,white,mug


### Dropping

In [33]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['red','blue','yellow','white'],
                    columns=['ball','pen','pencil','paper'])
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [34]:
frame.drop(['blue','yellow']) #To delete rows, just pass the indexes of the rows.

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
white,12,13,14,15


In [21]:
frame.drop(['pen','pencil'],axis=1) #To delete columns should specify axis = 1

Unnamed: 0,ball,paper
red,0,3
blue,4,7
yellow,8,11
white,12,15


### Operations between DataFrame and Series

In [35]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['red','blue','yellow','white'],
                    columns=['ball','pen','pencil','paper'])
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [36]:
ser = pd.Series(np.arange(4), index=['ball','pen','pencil','paper'])
ser

ball      0
pen       1
pencil    2
paper     3
dtype: int32

In [37]:
frame - ser

Unnamed: 0,ball,pen,pencil,paper
red,0,0,0,0
blue,4,4,4,4
yellow,8,8,8,8
white,12,12,12,12


The two newly defined data structures have been created specifically so that the indexes of Series match
with the names of the columns of the DataFrame. This way, you can apply a direct operation.

#### If an index is not present in one of the two data structures, the result will be a new column with that index only that all its elements will be NaN.

In [38]:
ser['Glass'] = 8
ser

ball      0
pen       1
pencil    2
paper     3
Glass     8
dtype: int64

In [39]:
frame - ser

Unnamed: 0,Glass,ball,paper,pen,pencil
red,,0,0,0,0
blue,,4,4,4,4
yellow,,8,8,8,8
white,,12,12,12,12


## Function Application and Mapping

### Functions by Element

In [4]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['red','blue','yellow','white'],
                    columns=['ball','pen','pencil','paper'])
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [7]:
np.sqrt(frame) #calculate the square root of each value within the data frame, using the NumPy np.sqrt().

Unnamed: 0,ball,pen,pencil,paper
red,0.0,1.0,1.414214,1.732051
blue,2.0,2.236068,2.44949,2.645751
yellow,2.828427,3.0,3.162278,3.316625
white,3.464102,3.605551,3.741657,3.872983


### Functions by Row or Column

In [5]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [17]:
r1=frame.max()

In [18]:
r2=frame.min()

In [19]:
r = r1-r2
r

ball      12
pen       12
pencil    12
paper     12
dtype: int32

In [5]:
f = lambda x: x.max() - x.min() #To calculates the range covered by the elements in an array.

In [6]:
frame.apply(f)

ball      12
pen       12
pencil    12
paper     12
dtype: int32

In [26]:
def f(x): return x.max() - x.min() #This is equal with the lambda function where to calculates the range covered by the elements in an array.

In [11]:
frame.apply(f)

ball      12
pen       12
pencil    12
paper     12
dtype: int32

In [23]:
frame.apply(f, axis=1) #To apply the function by row instead of by column,

red       3
blue      3
yellow    3
white     3
dtype: int32

In [24]:
def f(x):
    return pd.Series([x.min(), x.max()], index=['min','max'])

In [25]:
frame.apply(f)

Unnamed: 0,ball,pen,pencil,paper
min,0,1,2,3
max,12,13,14,15


In [36]:
def f(x):
    return pd.Series([x.max() - x.min()])

In [38]:
frame.apply(f)

Unnamed: 0,ball,pen,pencil,paper
0,12,12,12,12


In [33]:
def f(x):
    return pd.Series([x.max() - x.min()])

In [35]:
frame.apply(f, axis=1)

Unnamed: 0,0
red,3
blue,3
yellow,3
white,3


### Statistics Functions

In [40]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [41]:
frame.sum() #Based on columns

ball      24
pen       28
pencil    32
paper     36
dtype: int64

In [39]:
frame.sum(axis=1) #Based on rows

red        6
blue      22
yellow    38
white     54
dtype: int64

In [42]:
frame.mean()

ball      6.0
pen       7.0
pencil    8.0
paper     9.0
dtype: float64

In [43]:
frame.describe() #Summary Statistic by columns

Unnamed: 0,ball,pen,pencil,paper
count,4.0,4.0,4.0,4.0
mean,6.0,7.0,8.0,9.0
std,5.163978,5.163978,5.163978,5.163978
min,0.0,1.0,2.0,3.0
25%,3.0,4.0,5.0,6.0
50%,6.0,7.0,8.0,9.0
75%,9.0,10.0,11.0,12.0
max,12.0,13.0,14.0,15.0


In [46]:
frame.apply(pd.DataFrame.describe, axis=1)  #Summary Statistic by rows

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
red,4.0,1.5,1.290994,0.0,0.75,1.5,2.25,3.0
blue,4.0,5.5,1.290994,4.0,4.75,5.5,6.25,7.0
yellow,4.0,9.5,1.290994,8.0,8.75,9.5,10.25,11.0
white,4.0,13.5,1.290994,12.0,12.75,13.5,14.25,15.0


In [6]:
ser = pd.Series([5,0,3,8,4], index=['red','blue','yellow','white','green'])
ser

red       5
blue      0
yellow    3
white     8
green     4
dtype: int64

In [7]:
ser.sort_index() #Short by index where the index is alphabetical

blue      0
green     4
red       5
white     8
yellow    3
dtype: int64

In [51]:
ser.sort_index(ascending=False)

yellow    3
white     8
red       5
green     4
blue      0
dtype: int64

In [34]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['red','blue','yellow','white'],
                    columns=['ball','pen','pencil','paper'])
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [54]:
frame.sort_index()

Unnamed: 0,ball,pen,pencil,paper
blue,4,5,6,7
red,0,1,2,3
white,12,13,14,15
yellow,8,9,10,11


In [64]:
frame.sort_index(ascending=False, axis=1)

Unnamed: 0,pencil,pen,paper,ball
red,2,1,3,0
blue,6,5,7,4
yellow,10,9,11,8
white,14,13,15,12


In [65]:
frame.sort_index(axis=1)

Unnamed: 0,ball,paper,pen,pencil
red,0,3,1,2
blue,4,7,5,6
yellow,8,11,9,10
white,12,15,13,14


In [81]:
ser

red       5
blue      0
yellow    3
white     8
green     4
dtype: int64

In [71]:
ser.sort_values() #To order Series values

blue      0
yellow    3
green     4
red       5
white     8
dtype: int64

In [74]:
frame.sort_values(by='pen') #To order dataframe values

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [75]:
frame.sort_values(by=['pen','ball'])

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [76]:
ser.rank() #To rank from lowest to highest 

red       4.0
blue      1.0
yellow    2.0
white     5.0
green     3.0
dtype: float64

In [77]:
ser.rank(method='first')

red       4.0
blue      1.0
yellow    2.0
white     5.0
green     3.0
dtype: float64

The rank can also be assigned in the order in which the data are already in the data structure (without a
sorting operation). In this case, just add the method option with the ‘first’ value assigned.

In [82]:
ser.rank(ascending=False)

red       2.0
blue      5.0
yellow    4.0
white     1.0
green     3.0
dtype: float64

### Filtering Out NaN Values

In [15]:
ser = pd.Series([0,1,2,np.NaN,9], index=['red','blue','yellow','white','green']) #Input NaN values by Series
ser

red       0.0
blue      1.0
yellow    2.0
white     NaN
green     9.0
dtype: float64

In [16]:
ser['white'] = None 
ser

red       0.0
blue      1.0
yellow    2.0
white     NaN
green     9.0
dtype: float64

In [14]:
ser.dropna() #Filtering Out with dropna() function

red       0.0
blue      1.0
yellow    2.0
green     9.0
dtype: float64

In [17]:
ser.notnull() #To see is there NaN values or not

red        True
blue       True
yellow     True
white     False
green      True
dtype: bool

In [18]:
ser[ser.notnull()] #To filtering out NaN values  by notnull() function

red       0.0
blue      1.0
yellow    2.0
green     9.0
dtype: float64

In [19]:
frame3 = pd.DataFrame([[6,np.nan,6],[np.nan,np.nan,np.nan],[2,np.nan,5]],
                        index = ['blue','green','red'],
                        columns = ['ball','mug','pen'])
frame3

Unnamed: 0,ball,mug,pen
blue,6.0,,6.0
green,,,
red,2.0,,5.0


In [20]:
frame3.dropna()

Unnamed: 0,ball,mug,pen


If you only use the dropna function, then the rows and columns will be deleted as above. Therefore, should add the **how** and assigning a value of ‘all’ to it.

In [22]:
frame3.dropna(how='all') #To inform the dropna() function to delete only the rows or columns in which all elements are NaN.

Unnamed: 0,ball,mug,pen
blue,6.0,,6.0
red,2.0,,5.0


In [23]:
frame3.fillna(0)

Unnamed: 0,ball,mug,pen
blue,6.0,0.0,6.0
green,0.0,0.0,0.0
red,2.0,0.0,5.0


In [24]:
frame3.fillna({'ball':1,'mug':0,'pen':99})

Unnamed: 0,ball,mug,pen
blue,6.0,0.0,6.0
green,1.0,0.0,99.0
red,2.0,0.0,5.0


### Hierarchical Indexing and Leveling

The **hierarchical indexing** is a very important feature of pandas, as it allows you to have multiple levels of
indexes on a single axis. Somehow it gives you a way to work with data in multiple dimensions continuing to
work in a two-dimensional structure.

In [25]:
mser = pd.Series(np.random.rand(8),
                index=[['white','white','white','blue','blue','red','red','red'],
                ['up','down','right','up','down','up','down','left']])
mser

white  up       0.222365
       down     0.990001
       right    0.862875
blue   up       0.185356
       down     0.178676
red    up       0.615137
       down     0.370488
       left     0.820407
dtype: float64

In [26]:
mser.index

MultiIndex([('white',    'up'),
            ('white',  'down'),
            ('white', 'right'),
            ( 'blue',    'up'),
            ( 'blue',  'down'),
            (  'red',    'up'),
            (  'red',  'down'),
            (  'red',  'left')],
           )

In [27]:
mser['white']

up       0.222365
down     0.990001
right    0.862875
dtype: float64

In [30]:
mser[:,'up'] #Select values for a given value of the second index,

white    0.222365
blue     0.185356
red      0.615137
dtype: float64

In [31]:
mser['white','up']

0.22236538782781445

The hierarchical indexing plays a critical role in reshaping the data and group-based operations such
as creating a pivot-table. For example, the data could be used just rearranged in a data frame using a special
function called **unstack()**. This function converts the Series with hierarchical index in a simple DataFrame,
where the second set of indexes is converted into a new set of columns.

In [33]:
mser

white  up       0.222365
       down     0.990001
       right    0.862875
blue   up       0.185356
       down     0.178676
red    up       0.615137
       down     0.370488
       left     0.820407
dtype: float64

In [32]:
mser.unstack()

Unnamed: 0,down,left,right,up
blue,0.178676,,,0.185356
red,0.370488,0.820407,,0.615137
white,0.990001,,0.862875,0.222365


In [35]:
frame

Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
blue,4,5,6,7
yellow,8,9,10,11
white,12,13,14,15


In [36]:
frame.stack()

red     ball       0
        pen        1
        pencil     2
        paper      3
blue    ball       4
        pen        5
        pencil     6
        paper      7
yellow  ball       8
        pen        9
        pencil    10
        paper     11
white   ball      12
        pen       13
        pencil    14
        paper     15
dtype: int32

In [37]:
mser

white  up       0.222365
       down     0.990001
       right    0.862875
blue   up       0.185356
       down     0.178676
red    up       0.615137
       down     0.370488
       left     0.820407
dtype: float64

As regards the DataFrame, it is possible to define a hierarchical index both for the rows and for the
columns. At the time of the declaration of the DataFrame, you have to define an array of arrays for both the
**index** option and the **columns** option.

In [38]:
mframe = pd.DataFrame(np.random.randn(16).reshape(4,4),
                        index=[['white','white','red','red'], ['up','down','up','down']],
                        columns=[['pen','pen','paper','paper'],[1,2,1,2]])
mframe

Unnamed: 0_level_0,Unnamed: 1_level_0,pen,pen,paper,paper
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2
white,up,0.335369,-0.175777,1.428425,-0.584128
white,down,-1.040147,0.589232,-0.252987,1.332656
red,up,0.182178,0.963103,-1.689297,-0.360784
red,down,-1.065634,-0.560541,0.178974,-0.153848


### Reordering and Sorting Levels

The **swaplevel()** function accepts as argument the names assigned to the two levels that you want to
interchange, and returns a new object with the two levels interchanged between them, while leaving the data
unmodified.

In [39]:
mframe

Unnamed: 0_level_0,Unnamed: 1_level_0,pen,pen,paper,paper
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2
white,up,0.335369,-0.175777,1.428425,-0.584128
white,down,-1.040147,0.589232,-0.252987,1.332656
red,up,0.182178,0.963103,-1.689297,-0.360784
red,down,-1.065634,-0.560541,0.178974,-0.153848


In [51]:
mframe.columns.names=['Object','Id']
mframe.index.names=['Color','Status']

In [41]:
mframe

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Id,1,2,1,2
Color,Status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
white,up,0.335369,-0.175777,1.428425,-0.584128
white,down,-1.040147,0.589232,-0.252987,1.332656
red,up,0.182178,0.963103,-1.689297,-0.360784
red,down,-1.065634,-0.560541,0.178974,-0.153848


In [42]:
mframe.swaplevel('Status','Color')

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Id,1,2,1,2
Status,Color,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
up,white,0.335369,-0.175777,1.428425,-0.584128
down,white,-1.040147,0.589232,-0.252987,1.332656
up,red,0.182178,0.963103,-1.689297,-0.360784
down,red,-1.065634,-0.560541,0.178974,-0.153848


In [52]:
mframe

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Id,1,2,1,2
Color,Status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
white,up,0.335369,-0.175777,1.428425,-0.584128
white,down,-1.040147,0.589232,-0.252987,1.332656
red,up,0.182178,0.963103,-1.689297,-0.360784
red,down,-1.065634,-0.560541,0.178974,-0.153848


In [53]:
mframe.sort_index(level='Color')

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Id,1,2,1,2
Color,Status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
red,down,-1.065634,-0.560541,0.178974,-0.153848
red,up,0.182178,0.963103,-1.689297,-0.360784
white,down,-1.040147,0.589232,-0.252987,1.332656
white,up,0.335369,-0.175777,1.428425,-0.584128


In [58]:
mframe.sort_index(level='Color').swaplevel('Status','Color') #To swap and then sort by color

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Id,1,2,1,2
Status,Color,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
down,red,-1.065634,-0.560541,0.178974,-0.153848
up,red,0.182178,0.963103,-1.689297,-0.360784
down,white,-1.040147,0.589232,-0.252987,1.332656
up,white,0.335369,-0.175777,1.428425,-0.584128


### Summary Statistic by Level

In [60]:
mframe.sum(level='Color')

  mframe.sum(level='Color')


Object,pen,pen,paper,paper
Id,1,2,1,2
Color,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
white,-0.704778,0.413455,1.175438,0.748528
red,-0.883456,0.402563,-1.510324,-0.514631


In [61]:
mframe.groupby(level='Color').sum()

Object,pen,pen,paper,paper
Id,1,2,1,2
Color,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
red,-0.883456,0.402563,-1.510324,-0.514631
white,-0.704778,0.413455,1.175438,0.748528


If you want to make a statistic for a given level of the column, for example, the id, you must specify the
second axis as argument through the axis option set to 1.

In [62]:
mframe.groupby(level='Id', axis=1).sum()

Unnamed: 0_level_0,Id,1,2
Color,Status,Unnamed: 2_level_1,Unnamed: 3_level_1
white,up,1.763794,-0.759905
white,down,-1.293135,1.921888
red,up,-1.50712,0.60232
red,down,-0.88666,-0.714388


In [63]:
mframe.groupby(level='Object', axis=1).sum()

Unnamed: 0_level_0,Object,paper,pen
Color,Status,Unnamed: 2_level_1,Unnamed: 3_level_1
white,up,0.844297,0.159592
white,down,1.079669,-0.450915
red,up,-2.050081,1.145281
red,down,0.025126,-1.626174
