In [None]:
%matplotlib inline


# Blender with multiple inputs

Example using ``Blender`` with data collected from several sources.

<div class="alert alert-info"><h4>Note</h4><p>Each data source has a different ``BlenderTemplate``.</p></div>

<div class="alert alert-info"><h4>Note</h4><p>``Pandas`` reads .xlsx files with different sheets as a dictionary
          where the key is the worksheet name and the value is the
          DataFrame. Therefore, .xlsx files loaded with pandas can be
          inputed to ``Blender`` as in the example below.</p></div>


In [None]:
# Import
import pandas as pd

# DataBlend library
from datablend.core.blend.blender import Blender
from datablend.core.widgets.format import ReplaceWidget

# ------------------------
# Constants
# ------------------------
# Templates
template_exam = [
    # Body Temperature
    {'from_name': 'Temp',
     'to_name': 'body_temperature',
     'timestamp': 'date_exam',
     'unit': 'celsius'},

    # Gender
    {'from_name': 'Sex', 'to_name': 'gender',
     'to_replace': {1: 'Male', 2: 'Female'},
     'timestamp': 'date_exam'},
]

# .. note: In order to use the ReplaceWidget, the template needs to
#          include the 'to_replace' column. In this example, we are
#          using the same blender template for both worksheets
#          (exam and lab) so will include an empty to_replace in
#          the latter.

template_lab = [
    # HCT
    {'from_name': 'hct',
     'to_name': 'hct',
     'timestamp': 'date',
     'unit': '%'},

    # WBC
    {'from_name': 'wbc',
     'to_name': 'wbc',
     'timestamp': 'date',
     'unit': '10^9U/L',
     'to_replace': None}
]

# Data
data_exam = [
    {'pid': '32dx-001', 'date_exam': '10/07/2020', 'Temp': 37.2, 'Sex': 1},
    {'pid': '32dx-002', 'date_exam': '08/07/2020', 'Temp': 37.5, 'Sex': 2},
    {'pid': '32dx-003', 'date_exam': '10/07/2020', 'Temp': 36.7, 'Sex': 2},
]

data_lab = [
    {'pid': '32dx-001', 'date': '11/07/2020', 'hct': 1.0, 'wbc': 1.5},
    {'pid': '32dx-001', 'date': '12/07/2020', 'hct': 2.0, 'wbc': 2.5},
    {'pid': '32dx-001', 'date': '13/07/2020', 'hct': 3.0, 'wbc': 3.5},
    {'pid': '32dx-001', 'date': '14/07/2020', 'hct': 4.0, 'wbc': 4.5},
    {'pid': '32dx-001', 'date': '15/07/2020', 'hct': 5.0, 'wbc': 5.5},
    {'pid': '32dx-002', 'date': '09/07/2020', 'hct': 1.0, 'wbc': 3.5},
    {'pid': '32dx-002', 'date': '10/07/2020', 'hct': 3.0, 'wbc': 3.2},
    {'pid': '32dx-002', 'date': '11/07/2020', 'hct': 3.0, 'wbc': 4.3},
    {'pid': '32dx-003', 'date': '20/07/2020', 'hct': 2.0, 'wbc': 1.5},
    {'pid': '32dx-003', 'date': '21/07/2020', 'hct': 4.0, 'wbc': 2.5},
]


# Create templates dictionary
templates = {
    'EXAM': pd.DataFrame(template_exam),
    'LAB': pd.DataFrame(template_lab)
}

# Create data dictionary
data = {
    'EXAM': pd.DataFrame(data_exam),
    'LAB': pd.DataFrame(data_lab)
}

# Create blender
blender = Blender(widgets=[ReplaceWidget(errors='raise')])

# Fit blender to templates.
blender = blender.fit(info=templates)

# Transform data
transformed = blender.transform(data)

# Stack data
stacked = blender.stack(transformed, index='pid')

# Show
print("\nOriginal:")
for k,v in data.items():
    print("\n%s:" % k)
    print(v)

print("\nTransformed:")
for k,v in transformed.items():
    print("\n%s:" % k)
    print(v)

print("\nStacked:")
for k,v in stacked.items():
    print("\n%s:" % k)
    print(v)