## MultiIndex

In [2]:
import numpy as np
import pandas as pd     

In [3]:
s = pd.Series(np.arange(12))

In [5]:
s.index.values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11], dtype=int64)

In [7]:
array = [['a','a','a','a','b','b','b','b','c','c','c','c',],
        ['a','b','c','d','a','b','c','d','a','b','c','d',]]

In [9]:
t = list(zip(array[0], array[1]))
t

[('a', 'a'),
 ('a', 'b'),
 ('a', 'c'),
 ('a', 'd'),
 ('b', 'a'),
 ('b', 'b'),
 ('b', 'c'),
 ('b', 'd'),
 ('c', 'a'),
 ('c', 'b'),
 ('c', 'c'),
 ('c', 'd')]

In [11]:
mi = pd.MultiIndex.from_tuples(t, names = ['primero','segundo'])
mi

MultiIndex([('a', 'a'),
            ('a', 'b'),
            ('a', 'c'),
            ('a', 'd'),
            ('b', 'a'),
            ('b', 'b'),
            ('b', 'c'),
            ('b', 'd'),
            ('c', 'a'),
            ('c', 'b'),
            ('c', 'c'),
            ('c', 'd')],
           names=['primero', 'segundo'])

In [14]:
s = pd.Series(np.arange(12), index=mi)
s

primero  segundo
a        a           0
         b           1
         c           2
         d           3
b        a           4
         b           5
         c           6
         d           7
c        a           8
         b           9
         c          10
         d          11
dtype: int32

In [16]:
s.loc[('c','d')]

11

In [18]:
s.loc['a']

segundo
a    0
b    1
c    2
d    3
dtype: int32

In [19]:
s.loc[:,'a']

primero
a    0
b    4
c    8
dtype: int32

In [20]:
from pathlib import Path
data_path = Path('data')

In [21]:
dataset = pd.read_csv(data_path / "dataset.csv", sep=",")
dataset.head()

Unnamed: 0,Number,City,Gender,Age,Income,Illness
0,1,Dallas,Male,41,40367.0,No
1,2,Dallas,Male,54,45084.0,No
2,3,Dallas,Male,42,52483.0,No
3,4,Dallas,Male,40,40941.0,No
4,5,Dallas,Male,46,50289.0,No


In [25]:
tuples = [
    ("Columnas_no_importantes", "Number"),
    ("Columnas_importantes", "City"),
    ("Columnas_no_importantes", "Gender"),
    ("Columnas_importantes", "Age"),
    ("Columnas_importantes", "Income"),
    ("Columnas_no_importantes", "Illness"),
]

multiindex = pd.MultiIndex.from_tuples(tuples)
multiindex

MultiIndex([('Columnas_no_importantes',  'Number'),
            (   'Columnas_importantes',    'City'),
            ('Columnas_no_importantes',  'Gender'),
            (   'Columnas_importantes',     'Age'),
            (   'Columnas_importantes',  'Income'),
            ('Columnas_no_importantes', 'Illness')],
           )

In [24]:
dataset.columns = multiindex
dataset.head()

Unnamed: 0_level_0,Columnas_no_importantes,Columnas_importantes,Columnas_no_importantes,Columnas_importantes,Columnas_importantes,Columnas_no_importantes
Unnamed: 0_level_1,Number,City,Gender,Age,Income,Illness
0,1,Dallas,Male,41,40367.0,No
1,2,Dallas,Male,54,45084.0,No
2,3,Dallas,Male,42,52483.0,No
3,4,Dallas,Male,40,40941.0,No
4,5,Dallas,Male,46,50289.0,No


In [26]:
dataset['Columnas_importantes']

Unnamed: 0,City,Age,Income
0,Dallas,41,40367.0
1,Dallas,54,45084.0
2,Dallas,42,52483.0
3,Dallas,40,40941.0
4,Dallas,46,50289.0
...,...,...,...
149995,Austin,48,93669.0
149996,Austin,25,96748.0
149997,Austin,26,111885.0
149998,Austin,25,111878.0


In [27]:
dataset.columns

MultiIndex([('Columnas_no_importantes',  'Number'),
            (   'Columnas_importantes',    'City'),
            ('Columnas_no_importantes',  'Gender'),
            (   'Columnas_importantes',     'Age'),
            (   'Columnas_importantes',  'Income'),
            ('Columnas_no_importantes', 'Illness')],
           )

In [30]:
dataset.columns.get_level_values(0)

Index(['Columnas_no_importantes', 'Columnas_importantes',
       'Columnas_no_importantes', 'Columnas_importantes',
       'Columnas_importantes', 'Columnas_no_importantes'],
      dtype='object')

In [32]:
dataset.loc[:,('Columnas_importantes','City')]

0         Dallas
1         Dallas
2         Dallas
3         Dallas
4         Dallas
           ...  
149995    Austin
149996    Austin
149997    Austin
149998    Austin
149999    Austin
Name: (Columnas_importantes, City), Length: 150000, dtype: object

In [33]:
dataset.loc[:,[('Columnas_importantes','City'),('Columnas_importantes','Age')]]

Unnamed: 0_level_0,Columnas_importantes,Columnas_importantes
Unnamed: 0_level_1,City,Age
0,Dallas,41
1,Dallas,54
2,Dallas,42
3,Dallas,40
4,Dallas,46
...,...,...
149995,Austin,48
149996,Austin,25
149997,Austin,26
149998,Austin,25


In [34]:
dataset.head(5).T

Unnamed: 0,Unnamed: 1,0,1,2,3,4
Columnas_no_importantes,Number,1,2,3,4,5
Columnas_importantes,City,Dallas,Dallas,Dallas,Dallas,Dallas
Columnas_no_importantes,Gender,Male,Male,Male,Male,Male
Columnas_importantes,Age,41,54,42,40,46
Columnas_importantes,Income,40367.0,45084.0,52483.0,40941.0,50289.0
Columnas_no_importantes,Illness,No,No,No,No,No


In [44]:
dataset.sort_index(level=0, axis = 1)

Unnamed: 0_level_0,Columnas_importantes,Columnas_importantes,Columnas_importantes,Columnas_no_importantes,Columnas_no_importantes,Columnas_no_importantes
Unnamed: 0_level_1,Age,City,Income,Gender,Illness,Number
0,41,Dallas,40367.0,Male,No,1
1,54,Dallas,45084.0,Male,No,2
2,42,Dallas,52483.0,Male,No,3
3,40,Dallas,40941.0,Male,No,4
4,46,Dallas,50289.0,Male,No,5
...,...,...,...,...,...,...
149995,48,Austin,93669.0,Male,No,149996
149996,25,Austin,96748.0,Male,No,149997
149997,26,Austin,111885.0,Male,No,149998
149998,25,Austin,111878.0,Male,No,149999


In [47]:
dataset.sort_index(level=0, axis = 1, inplace=True)
dataset.head()

Unnamed: 0_level_0,Columnas_importantes,Columnas_importantes,Columnas_importantes,Columnas_no_importantes,Columnas_no_importantes,Columnas_no_importantes
Unnamed: 0_level_1,Age,City,Income,Gender,Illness,Number
0,41,Dallas,40367.0,Male,No,1
1,54,Dallas,45084.0,Male,No,2
2,42,Dallas,52483.0,Male,No,3
3,40,Dallas,40941.0,Male,No,4
4,46,Dallas,50289.0,Male,No,5


In [48]:
dataset.sort_index(level=1, axis = 1, inplace=True)
dataset.head()

Unnamed: 0_level_0,Columnas_importantes,Columnas_importantes,Columnas_no_importantes,Columnas_no_importantes,Columnas_importantes,Columnas_no_importantes
Unnamed: 0_level_1,Age,City,Gender,Illness,Income,Number
0,41,Dallas,Male,No,40367.0,1
1,54,Dallas,Male,No,45084.0,2
2,42,Dallas,Male,No,52483.0,3
3,40,Dallas,Male,No,40941.0,4
4,46,Dallas,Male,No,50289.0,5
