# Tests of pandas tics

In [48]:
import pandas as pd
import numpy as np
from pandas import DataFrame
from   typing      import Tuple, List


## Define a test ntuple

In [71]:
# hierarchical indices and columns
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],
                                     names=['subject', 'type'])

# mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

# create the DataFrame
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,31.0,38.1,43.0,35.4,38.0,38.4
2013,2,39.0,35.1,39.0,37.1,55.0,36.7
2014,1,32.0,36.7,41.0,37.0,35.0,36.6
2014,2,42.0,39.5,35.0,37.0,1.0,39.2


***
def get_index_slice_from_multi_index(df     : DataFrame,
                                          i      : int,
                                          unique : bool = True)->np.array:

***

Suppose that you want a list of the years in the ``health_data`` DF. This may be needed if you need to operate on data of each year. How do you get such a list? Let's start by writting explicitly the multi-index

In [72]:
health_data.index

MultiIndex([(2013, 1),
            (2013, 2),
            (2014, 1),
            (2014, 2)],
           names=['year', 'visit'])

Now lets get a list

In [73]:
list(zip(*health_data.index.values))

[(2013, 2013, 2014, 2014), (1, 2, 1, 2)]

We can select now the first element

In [74]:
list(zip(*health_data.index.values))[0]

(2013, 2013, 2014, 2014)

And if we want just the years that are different to the other years:

In [75]:
np.unique(list(zip(*health_data.index.values))[0])

array([2013, 2014])

With this, we can write a function:

#### Function

In [76]:
def get_index_slice_from_multi_index(df     : DataFrame,
                                          i      : int,
                                          unique : bool = True)->np.array:
    """
    Given a DataFrame df with multiindex, return an array
    containing a view (with just the unique elements if specified) of
    index i
    """
    vi = df.index.values
    if unique:
        return np.unique(list(zip(*vi))[i])
    else:
        return list(zip(*vi))[i]


Now we can get the years easily:

In [77]:
get_index_slice_from_multi_index(health_data, i = 0)

array([2013, 2014])

#### Test for function

In [78]:
def test_get_index_slice_from_multi_index_unique(df):
    lst = get_index_slice_from_multi_index(df, i = 0)
    return np.all(np.equal(lst, np.array([2013, 2014])))

In [79]:
test_get_index_slice_from_multi_index_unique(health_data)

True

We can now try the other dimension, keeping all indexes

In [80]:
get_index_slice_from_multi_index(health_data, i = 1, unique=False)

(1, 2, 1, 2)

And add a second test

In [81]:
def test_get_index_slice_from_multi_index_not_unique(df):
    lst = get_index_slice_from_multi_index(df, i = 1, unique=False)
    return np.all(np.equal(lst, (1, 2, 1, 2)))

In [82]:
test_get_index_slice_from_multi_index_not_unique(health_data)

True