# Creazione MultiIndex 

## MultiIndex da tuple
### Esempio tratto dalla guida ufficiale 

In [62]:
import pandas as pd
import numpy as np

'''         colonne    1       2      3      4      5      6     7       8
                [
   livello 0 : name = first -->     ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
   livello 1 : name = second-->     ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']
                ]
'''
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
        ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

''' 
        np.random.randn(3, 8)  --> 3 righe , 8 colonne 
'''

df = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)

In [63]:
'''
Nome    -->  first  second 
Livello -->    0     1     
MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])
'''
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [42]:
print('Multindex : propieta` fondamentali')
print('LEVELS :')
print(index.levels)
print('Names :')
print(index.names)
print('Codes :')
print(index.codes)

Multindex : propieta` fondamentali
LEVELS :
[['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
Names :
['first', 'second']
Codes :
[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]


## Spiegazione :

#### list(zip(*arrays) ): 
    
- l' operatore * (star) scompone l'array ( bidimensionale ) in ogni elemento base; 
- la funzione zip() restituisce un oggetto  a zip object cio'e un iteratore di tuple, dove la i-esima tupla contiene l'i-esimo elemento da ciascuno degli argomenti  The iterator stops when the shortest input iterable is exhausted. With a single iterable argument, it returns an iterator of 1-tuples. 
- quindi l'ggetto zip viene passato come parametro alla funzione list() che crea una lista di tuple.



In [5]:
list(zip(*arrays))

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

Viene quindi creato un pandas ***MultiIndex*** :

In [14]:
pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

Viene creato un array bidimensionale di 3 arrays, di 8 elementi ciascuno, ovvero una tabella con tre righe e 8 colonne :

In [37]:
index.names

FrozenList(['first', 'second', 'third'])

In [16]:
np.random.randn(3, 8)

array([[ 1.42085025, -1.07552572,  0.05661205,  1.00344212,  0.60825994,
         0.28666134, -0.10548615, -0.13467475],
       [ 2.58756885,  1.22924712, -0.43138202, -0.09772374, -1.11874861,
        -1.27147449, -0.74400236,  0.31706869],
       [-2.210696  , -2.24802756, -0.43701484,  0.01504402,  1.24199428,
         0.53015106,  0.89897939,  0.48728111]])

Creo il dataframe :
- index : lindice delle righe 
- columns : viene passato il ***MultiIndex*** di 8 elementi

In [19]:
pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-0.290167,-0.540605,2.375592,0.695244,0.475548,1.020698,-1.796938,-0.649318
B,0.507421,-1.660335,0.820755,0.434711,0.547005,1.23901,-0.543008,1.225654
C,0.69577,-0.128786,0.155346,-1.195229,0.481559,1.4512,1.013311,0.07714


In [21]:
df

first,bar,bar,baAAz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.783058,-0.642049,-0.313852,0.585943,0.742195,-0.322761,1.182186,0.478932
B,0.245274,-0.723848,0.146783,0.603417,0.684211,0.506597,0.239184,-1.220607
C,-0.225878,-0.594039,-1.911912,-0.061567,-0.674162,0.458428,0.975964,0.113631


Posso anche aggiungere un ***livello*** :
- arrays = [ ['AA' ...], ['bar', ...],['one', ...]]
-  Bisogna aggiungere anche un ***terzo nome ***
  - index = pd.MultiIndex.from_tuples(tuples, ***names=['first', 'second','third']***)

In [56]:
import pandas as pd
import numpy as np

'''         colonne    1       2      3      4      5      6     7       8
                [
    livello 0 name= first  -->   ['AA' ,  'AA',  'AA', 'BBB', 'BBB', 'BBB', 'BBB', 'BBB'],
    livello 1 name= second -->   ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
    livello 2 name= third  -->   ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']
                ]
'''
arrays = [
        ['AA' ,  'AA',  'AA', 'BBB', 'BBB', 'BBB', 'BBB', 'BBB'],
        ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
        ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
tuples = list(zip(*arrays))

''' METTERE UN NOME PER OGNI LIVELLO   '''
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second','third'])
index

''' 
        np.random.randn(3, 8)  --> 3 righe , 8 colonne 
'''

pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)

first,AA,AA,AA,BBB,BBB,BBB,BBB,BBB
second,bar,bar,baz,baz,foo,foo,qux,qux
third,one,two,one,two,one,two,one,two
A,0.843993,0.371232,-1.555755,0.877928,-1.070578,0.95691,1.222008,1.007629
B,0.507904,-0.963002,-0.185847,0.499712,0.069413,-1.003651,-0.609588,-0.433699
C,-0.026223,0.570568,0.434825,-0.541166,0.74447,-0.545312,1.236274,-0.752988


In [61]:
'''
Nome    -->  first  second  third
Livello -->    0     1       2
MultiIndex([( 'AA', 'bar', 'one'),
            ( 'AA', 'bar', 'two'),
            ( 'AA', 'baz', 'one'),
            ('BBB', 'baz', 'two'),
            ('BBB', 'foo', 'one'),
            ('BBB', 'foo', 'two'),
            ('BBB', 'qux', 'one'),
            ('BBB', 'qux', 'two')],
'''
index

MultiIndex([( 'AA', 'bar', 'one'),
            ( 'AA', 'bar', 'two'),
            ( 'AA', 'baz', 'one'),
            ('BBB', 'baz', 'two'),
            ('BBB', 'foo', 'one'),
            ('BBB', 'foo', 'two'),
            ('BBB', 'qux', 'one'),
            ('BBB', 'qux', 'two')],
           names=['first', 'second', 'third'])

# Creazione MultiIndex VUOTO 

## Costruttore : pandas.MultiIndex()

### class pandas.MultiIndex(levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True, _set_identity=True)[source]

- Parameters

***levels***  : sequence of arrays
            The unique labels for each level.
            
***codes***: sequence of arrays
            Integers for each level designating which label at each location.
            New in version 0.24.0.
            
sortorder : optional int
            Level of sortedness (must be lexicographically sorted by that level).
            
***names*** :optional sequence of objects
            Names for each of the index levels. (name is accepted for compat).
            copybool, default False



In [48]:
# labels has been replaced with codes in 0.25.1 
index = pd.MultiIndex(levels=[[],[]],
                             codes=[[],[]],
                             names=[u'Nome1', u'Nome2'])

In [49]:
index

MultiIndex([], names=['Nome1', 'Nome2'])

In [81]:
# labels has been replaced with codes in 0.25.1 
index = pd.MultiIndex(levels=[['A','B'],['a','b','c']],
                             codes=[[0, 0],[1,2]],
                             names=[u'Nome1', u'Nome2'])

In [82]:
'''
Name    -->  Nome1  Nome2
levels  -->    0   1
MultiIndex([('A', 'b'),
            ('A', 'c')],
           names=['Nome1', 'Nome2'])
'''
index

MultiIndex([('A', 'b'),
            ('A', 'c')],
           names=['Nome1', 'Nome2'])

In [68]:
print('Multindex : propieta` fondamentali')
print('LEVELS :')
print(index.levels)
print('Names :')
print(index.names)
print('Codes :')
print(index.codes)

Multindex : propieta` fondamentali
LEVELS :
[['A', 'B'], ['a', 'b', 'c']]
Names :
['Nome1', 'Nome2']
Codes :
[[0, 0], [1, 2]]


In [83]:
'''
Multindex : propieta` fondamentali
LEVELS :
[['A', 'B'], ['a', 'b', 'c']]
Codes :
[[0, 0], [1, 2]]

Codes
[0, _] --->  ('A','b')
[1, _]

[_, 0] --->  ('A','c')
[_, 2] 

'''
print(index.codes)

[[0, 0], [1, 2]]
