#### Load in Data

In [None]:
# Import packages

import pandas as pd
import numpy as np
import altair as alt #you may need to install altair if you don't have it on your machine already

In [None]:
# Tools

import functools
def conjunction(*conditions):
    return functools.reduce(np.logical_and, conditions)

In [None]:
%%html
    <style>
        @import url('https://fonts.googleapis.com/css?family=Encode+Sans:400');
        @import url('https://fonts.googleapis.com/css?family=Open+Sans:400');
        @import url('https://fonts.googleapis.com/css?family=Open+Sans:700');
    </style>

In [None]:
def catalyst(*args, **kwargs):
    font = "Times New Roman"
    labelFont = "Times New Roman" 
    sourceFont = "Times New Roman"
    # Axes
    axisColor = "#000000"
    gridColor = "#DEDDDD"
    # Colors
    main_palette = [   '#2980B9', #(light blue)
        '#003366', #(dark blue)
        '#FFAE19', #(gold)
        '#D68910', #(dark gold)
        '#E67E22', #(orange)
        '#D35400', #(dark orange)
        '#FF4500', #(red)
        '#FF6162', #(dark pink/red)
        '#438B28', #(green)
        '#45B39D', #(teal)
        '#99ADC1', #(light gray)
        '#212F3C', #(dark gray)
    ]
    return {
        "width": 800, 
        "height": 400, 
        "config": {
            "title": {
                "fontSize": 18,
                "font": font,
                "anchor": "center",
                "fontColor": "#000000",
            },
            "axisX": {
                "domain": True,
                "domainColor": axisColor,
                #"domainWidth": 1,
                "grid": False,
                "labelFont": labelFont,
                "labelFontSize": 14,
                #"labelAngle": 0,
                #"tickColor": axisColor,
                #"tickSize": 5, 
                "titleFont": font,
                "titleFontSize": 14,
                "titleFontWeight": "normal",
                #"titlePadding": 10, 
                "title": "X Axis Title (units)", 
                #"titleAngle":0
            },
            "axisY": {
                "domain": True,
                "grid": False,
                "gridColor": gridColor,
                "gridWidth": 1,
                "labelFont": labelFont,
                "labelFontSize": 14,
                "labelAngle": 0,
                "ticks": False, 
                "titleFont": font,
                "titleFontSize": 14,
                "titleFontWeight": "normal",
                "titlePadding": 10, 
                "title": "Y Axis Title (units)",
                #"titleY": -10, 
                #"titleX": 18,
                #"titleAngle": 0 #-90
            },
            "range": {
                "category": main_palette
            },
            "legend": {
                "labelFont": labelFont,
                "labelFontSize": 14,
                "symbolType": "square", 
                "symbolSize": 100, 
                "titleFont": font,
                "titleFontSize": 14,
                "titleFontWeight": "normal",
                "title": "Legend", 
                "orient": "right", 
                "offset": 0, 
            },
            "tooltip":{
                "font": "Open Sans"
            },
            "view": {
                "stroke": "transparent", 
        ### MARKS CONFIGURATIONS ###
           "text": {
               "font": "Times New Roman",
               "color": main_palette,
               "fontSize": 14,
               "align": "right",
               "fontWeight": 400,
               "size": 14,
           }, 
           "bar": {
                "size": 40,
                "binSpacing": 1,
                "continuousBandSize": 30,
                "discreteBandSize": 30,
                "fill": main_palette,
                "stroke": False,
           },
            }
        }
    }

In [None]:
alt.themes.register('catalyst', catalyst)
alt.themes.enable('catalyst');

In [None]:
# Load MTF Data here
#Total number of households surveyed in the overall survey: ____ (change as necessary)

#Due to large file size, this helps break down the loading process into steps to ease memory usage
chunksize = 10

mtf_list = []
chunksize = 10
for chunk in pd.read_csv("placeholder.csv", encoding='latin-1', chunksize = chunksize):
    mtf_list.append(chunk)
    
data = pd.concat(mtf_list, axis=0)
data = pd.DataFrame(data = data)

#### Charts

In [None]:
# Bar Chart Example: Primary Wall Material

# Data source
alt.data_transformers.disable_max_rows() #this code disables the max row limit for Altair
source = finance[finance[['b10']].notnull().all(1)] # Change your dataframe and variables here

col1 = source['elc_aggr_tier'] != 'NaN'
col2 = source['b10'] != 'NaN' # Change variables here

source = source[conjunction(col1,col2)]
df = source[['hh_id','elc_aggr_tier', 'locality', 'b10']] #change variable name and add any filter variables

# Draw the chart 
chart = alt.Chart(df).transform_joinaggregate(  #the next 4 lines transform the y-axis to show percentages
    total = 'count(*)'
).transform_calculate(
    pct = '1 / datum.total'
).encode(  #the encode() section tells the chart what to show for the x and y axes
    x = alt.X('b10', # Change variable
              sort=alt.EncodingSortField(field="b10", # Change variable 
                                         op="count", 
                                         order='descending'),
              axis = alt.Axis(title = " ")),
    y = alt.Y('sum(pct):Q', axis = alt.Axis(title = '', format = '%')),
    tooltip = 'b10' # Change variable
).properties( #the properties() adds additional stylistic changes
    height = 400,
    width = 500
)

# Locality Filter
locality_options = [None, 0, 1]
locality_labels = ['National', 'Urban', 'Rural']

locality_dropdown=alt.binding_select(
    options= locality_options, 
    labels = locality_labels, 
    name = "Locality Breakdown  ")
locality_select=alt.selection_single(
    fields=['locality'],
    bind=locality_dropdown)

# Tier Filter
tiers = [None, 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_labels = ['All Tiers', 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_dropdown=alt.binding_select(options=tiers, 
                                 labels = tier_labels, 
                                 name = "Tiers  ")
tier_select=alt.selection_single(fields=['elc_aggr_tier'],
                                bind=tier_dropdown)

# Percentage of responses shown in the chart
pct_responses = round(len(pd.unique(df[df[['b10']].notnull().all(1)]['hh_id'])) /
            3668 * 100, 2)

# Final Chart with bells and whistles
final = (chart.mark_bar(color = '#304ca2') + chart.mark_text( # This blue is from the AIP site.
    align = 'center', 
    baseline='bottom'
).encode(
    text = alt.Text('sum(pct):Q', 
                    format = '.2%'))
).add_selection(
    locality_select
).transform_filter(
    locality_select
).add_selection(
    tier_select
).transform_filter(
    tier_select 
).properties(
    title={
      "text": ["The walls of the dwelling are mainly made of what material?"], #change title as necessary
  #    "subtitle": ["You are viewing {}% of responses from the Nigeria Multi-Tiered Framework Survey.".format(pct), " "],
      "color": "black",
      "subtitleColor": "black"
    }
)

final = alt.concat(final,
    title=alt.TitleParams(
        ['', '',  
         'This chart includes responses from {}% of households in the Nigeria MTF survey sample (total size = 3,668).'.format(
             pct_responses),
         'Use the dropdown filters to interact with the data.'],
        baseline='bottom',
        orient='bottom',
        anchor='start',
        font = 'Times New Roman',
        fontWeight='normal',
        fontStyle = "italic",
        fontSize=15)
)

final

In [None]:
# Histogram Example: How much do you pay for rent each month?

# Data Source
alt.data_transformers.disable_max_rows()
source = rent[rent[['l_expenditure']].notnull().all(1)]  #Change column names here

col1 = source['l_expenditure'] #!= NaN #Change variable name
col2 = source['elc_aggr_tier'] != 'NaN'

source = source[conjunction(col1,col2)]
df = source[['hh_id','elc_aggr_tier','l_expenditure', 'locality']] #change variable name and add any filter variables

# Locality Filter
locality_options = [None, 0, 1]
locality_labels = ['National', 'Urban', 'Rural']

locality_dropdown=alt.binding_select(
    options= locality_options, 
    labels = locality_labels, 
    name = "Locality Breakdown  ")
locality_select=alt.selection_single(
    fields=['locality'],
    bind=locality_dropdown)

# Tier Filter
tiers = [None, 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_labels = ['All Tiers', 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_dropdown=alt.binding_select(options=tiers, 
                                 labels = tier_labels, 
                                 name = "Tiers  ")
tier_select=alt.selection_single(fields=['elc_aggr_tier'],
                                bind=tier_dropdown)

# Percentage of responses shown in the chart
pct_responses = round(len(pd.unique(df[df[['l_expenditure']].notnull().all(1)]['hh_id'])) /
            3668 *100, 2) #change dataframe variable name

# Draw the Chart
final = alt.Chart(df).transform_joinaggregate(
    total='count(*)'
).transform_calculate(
    pct='1 / datum.total'
).mark_bar().encode(
    alt.X("l_expenditure:Q", # change variable name here
          bin=alt.Bin(extent=[1, 160000], step=10000), # Change the step to adjust bin size
          scale = alt.Scale(domain=(0,160000),clamp = True), # Change the axes limits
         axis = alt.Axis(title = "Nigerian Naira")), #change title as necessary
    alt.Y('sum(pct):Q', axis=alt.Axis(format='%'), title = ""),
    tooltip = [alt.Tooltip('l_expenditure:Q', title = "Response (Binned)"), # change variable name here
               alt.Tooltip('sum(pct):Q', format = '.2%', title = "Percentage")]
).transform_bin(
    "mbin",
    field="m",
    bin=alt.Bin(maxbins=20) # Specify the maximum number of bins
).add_selection(
    locality_select
).transform_filter(
    locality_select
).add_selection(
    locality_select
).transform_filter(
    locality_select
).add_selection(
    tier_select
).transform_filter(
    tier_select 
).properties(
    height = 400,
    width = 500,
    title={
      "text": ["How much do you pay for rent each month?"], #chang etitle as necessary
 #     "subtitle": ["You are viewing {}% of responses from the Nigeria Multi-Tiered Framework Survey.".format(pct_responses), " "],
      "color": "black",
      "subtitleColor": "black"
    }
)

final = alt.concat(final,
    title=alt.TitleParams(
        ['', '',  
         'This chart includes responses from {}% of households in the Nigeria MTF survey sample (total size = 3,668).'.format(
             pct_responses),
         'Use the dropdown filters to interact with the data.'],
        baseline='bottom',
        orient='bottom',
        anchor='start',
        font = 'Times New Roman',
        fontWeight='normal',
        fontStyle = "italic",
        fontSize=15)
)

final

In [None]:
# Heatmap Example: Floor vs Wall Material

# Data
alt.data_transformers.disable_max_rows()
source = finance[finance[['b10', 'b11','b12']].notnull().all(1)] #change variable

col1 = source['b10'] != 'NaN' #change variable
col2 = source['b11'] != 'NaN' #change variable
col3 = source['b12'] != 'NaN' #change variable
col4 = source['elc_aggr_tier'] != 'NaN'

source = source[conjunction(col1,col2,col3,col4)]
df = source[['hh_id','elc_aggr_tier','b10', 'b11', 'b12', 'locality']] #change variable

# Configure common options
base = alt.Chart(df).transform_aggregate(
    chart_count='count()',
    groupby=['b10', 'b12']
).encode(
    alt.X('b12:N', #change variable name
          scale=alt.Scale(paddingInner=0), 
          title = "Floor Material"), #change title
    alt.Y('b10:N', #change variable name
          scale=alt.Scale(paddingInner=0), 
          title = "Wall Material"), #change title
)

# Configure heatmap
heatmap = base.mark_rect().encode(
    color=alt.Color('chart_count:Q',
        scale=alt.Scale(scheme='blues'),
        legend=None
    )
)

# Configure text
text = base.mark_text(baseline='middle'
).transform_joinaggregate(
    count = 'count(chart_count)',
    groupby = ['b10', 'b12'] #change variables
).transform_calculate(
    pct = alt.datum.chart_count / df.shape[0]
).encode(
    text=alt.Text('pct:Q', format = '.2%'),
    color=alt.condition(
        alt.datum.chart_count < 300, # Change the color of the text here
        alt.value('black'),
        alt.value('white')
    )
)

# Locality Filter
locality_options = [None, 0, 1]
locality_labels = ['National', 'Urban', 'Rural']

locality_dropdown=alt.binding_select(
    options= locality_options, 
    labels = locality_labels, 
    name = "Locality Breakdown  ")
locality_select=alt.selection_single(
    fields=['locality'],
    bind=locality_dropdown)

# Tier Filter
tiers = [None, 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_labels = ['All Tiers', 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_dropdown=alt.binding_select(options=tiers, labels = tier_labels, name = "Tiers  ")
tier_select=alt.selection_single(fields=['elc_aggr_tier'],
                                bind=tier_dropdown)

# Roof filter
# roofs = [None, 'Corrugated iron sheet', 'Wood and thatch', 'Wood and mud', 'Bricks', 'Reed/bamboo', 'Asbestos',
#          'Stone and Cement', 'Other, specify', 'Plastic/ canvas']
# roof_labels = ['All roof types', 'Corrugated iron sheet', 'Wood and thatch', 'Wood and mud', 'Bricks', 'Reed/Bamboo', 
#          'Asbestos', 'Stone and Cement', 'Others', 'Plastic/canvas']
# roof_dropdown=alt.binding_select(options=roofs, labels = roof_labels, name = "Roofs  ")
# roof_select=alt.selection_single(fields=['b11'],
#                                 bind=roof_dropdown)

pct = round(len(pd.unique(df['hh_id'])) /
            3668 * 100, 2)

# Draw the chart
final = (heatmap + text).properties(
    height = 300,
    width = 800
).add_selection(
    locality_select
).transform_filter(
    locality_select
).add_selection(
    tier_select
).transform_filter(
    tier_select 
 ).add_selection(
     roof_select
 ).transform_filter(
     roof_select
).properties(
    title={
      "text": ['Floor vs. Wall Dwelling Material'], 
  #    "subtitle": ["You are viewing {}% of responses from the Kenya Multi-Tiered Framework Survey.".format(pct), " "],
      "color": "black",
      "subtitleColor": "black"
    }
)

final = alt.concat(final,
    title=alt.TitleParams(
        ['', '',  
         'This chart includes responses from {}% of households in the Nigeria MTF survey sample (total size = 3,668).'.format(
             pct_responses),
         'Use the dropdown filters to interact with the data.'],
        baseline='bottom',
        orient='bottom',
        anchor='start',
        font = 'Times New Roman',
        fontWeight='normal',
        fontStyle = "italic",
        fontSize=15)
)

final

In [None]:
# Step 1: create the scatterplot
# How much would you sell your land for?

df = module[module[['hh_id','n_n_1b', 'n_n_1c', 'n_n_1b_unit']].notnull().all(1)]

# Filters
locality_options = [None, 'Urban', 'Rural']
locality_labels = ['National', 'Urban', 'Rural']
locality_dropdown=alt.binding_select(
    options= locality_options, 
    labels = locality_labels, 
    name = "Locality Breakdown  ")
locality_select=alt.selection_single(
    fields=['locality_ur'],
    bind=locality_dropdown)

tiers = [None, 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_labels = ['All Tiers', 'Tier 0', 'Tier 1', 'Tier 2', 'Tier 3', 'Tier 4', 'Tier 5']
tier_dropdown=alt.binding_select(options=tiers, labels = tier_labels, name = "Tiers  ")
tier_select=alt.selection_single(fields=['elc_aggr_tier'],
                                bind=tier_dropdown)

# You need 2 unit filters to apply to both the scatterplot and histogram.
units = ['Acre', 'sq metres','Other']
unit_labels = ['Acre', 'Square meter','Other']
unit_dropdown=alt.binding_select(options=units, labels = unit_labels, name = "Units  ")
unit_select=alt.selection_single(fields=['n_n_1b_unit:N'],
                                bind=unit_dropdown)

units = [None, 'Acre', 'sq metres','Other']
unit_labels = ['All units', 'Acre', 'Square meter','Other']
unit_dropdown=alt.binding_select(options=units, labels = unit_labels, name = "Units  ")
unit_select=alt.selection_single(fields=['n_n_1b_unit'],
                                 bind=unit_dropdown)
 
# Scatterplot    
points = alt.Chart(df).mark_point().encode(
    x=alt.X('n_n_1b:Q',
           axis = alt.Axis(title = 'What is the total size of the land?'),
           scale = alt.Scale(domain=(1,100),clamp = True)),
    y=alt.Y('n_n_1c:Q',
            axis=alt.Axis(title='How much would you sell your land for?'),
           scale = alt.Scale(domain=(1,70000000),clamp = True)),
    color = alt.Color('n_n_1b_unit:N', legend = alt.Legend(title = "Units"))
)

pct_responses = round(len(pd.unique(df[df[['n_n_1b']].notnull().all(1)]['hh_id'])) /
            3668 *100, 2) #change dataframe variable name

scatterplot = (points).properties(
    height = 300,
    width = 500
).add_selection(brush)

scatterplot

In [None]:
# Step 2: add the histogram
df = module[module[['n_n_1b']].notnull().all(1)]

points = alt.Chart(df).mark_point().encode(
    x=alt.X('n_n_1b:Q',
           axis = alt.Axis(title = 'What is the total size of the land?'),
           scale = alt.Scale(domain=(1,100),clamp = True)),
    y=alt.Y('n_n_1c:Q',
            axis=alt.Axis(title='How much would you sell your land for?'),
           scale = alt.Scale(domain=(1,70000000),clamp = True)),
    color = alt.Color('n_n_1b_unit:N', legend = alt.Legend(title = "Units"))
)

scatterplot = (points).properties(
    height = 300,
    width = 500
).add_selection(brush)

chart = alt.Chart(df).mark_bar(
).transform_joinaggregate(
    total='count(*)'
).transform_calculate(
    pct='1 / datum.total'
).encode(
    alt.X("n_n_1b", 
          bin=alt.Bin(extent=[0, 100], step=5), 
          scale = alt.Scale(domain=(0,100),clamp = True),
         axis = alt.Axis(title = "Units of Land")),
    alt.Y('sum(pct):Q', axis=alt.Axis(format='%'), title = "Percent"),
    tooltip = alt.Tooltip('sum(pct):Q', format = '.2%')
).transform_bin(
    "mbin",
    field="m",
    bin=alt.Bin(maxbins=20)
)

# Draw the chart
histogram = chart.properties(
    height = 300,
    width = 500,
    title = ''
).transform_filter(brush)


scatterplot & histogram

In [None]:
# Finally, add filters

final = (scatterplot & histogram
).add_selection(
    locality_select
).transform_filter(
    locality_select
).add_selection(
    tier_select
).transform_filter(
    tier_select 
).add_selection(
    unit_select
).transform_filter(
    unit_select
).properties(
    title={
      "text": ["Example (YOUR TITLE HERE) "], 
 #     "subtitle": ["You are viewing XX% of responses from the Kenya Multi-Tiered Framework Survey.", "Drag and drop a box over the scatterplot to filter the histogram, or use the dropdown filters below.", ""],
      "color": "black",
      "subtitleColor": "black"
    }
)

final = alt.concat(final,
    title=alt.TitleParams(
        ['', '',  
         'This chart includes responses from {}% of households in the Nigeria MTF survey sample (total size = 3,668).'.format(
             pct_responses),
         'Use the dropdown filters to interact with the data.'],
        baseline='bottom',
        orient='bottom',
        anchor='start',
        font = 'Times New Roman',
        fontWeight='normal',
        fontStyle = "italic",
        fontSize=15)
)

final

#### Creating Filters

In [None]:
#########################################################
################### Creating Filters ####################
#########################################################

# Three main parts to creating filters:
# 1) Include the filter related data in column selection
# 2) Create the base code for the filter
# 3) Encode the filter in the chart encoding

# Selecting filter column
# When the chart data is selected from the original dataframe, make sure to include the filter-related column
# alt.data_transformers.disable_max_rows()
# source = finance[finance[['b10']].notnull().all(1)] # Change your variables here

# col1 = source['elc_aggr_tier'] != 'NaN'
# col2 = source['b10'] != 'NaN' # Change variables here

source = source[conjunction(col1,col2)]
df = source[['hh_id','elc_aggr_tier', 'locality', 'b10', 'c182']]  # <--- Add any filter variables here (in this case, c182)

# Base Code Example: Primary Source of Electricity 
# First, create a list of all possible responses for a given filter based on original data
# You can use df['column'].value_counts() to get all possible responses for a given filter
electricity_source = [None, 'Dry-cell battery', 'National Grid Connection', 'No electricity', 'Electric generator',
              'Other, specify', 'Solar Multi-Light Product', 'Solar Home System', 'Solar Lantern',
               'Local Mini Grid connection', 'Rechargeable Battery']

# Second, create a list of response labels that you want the user to see for the filter
# These do not have to match the responses given in the original dataset
electricity_source_labels = ['All Responses', 'Dry-cell battery', 'National Grid Connection', 'No electricity',
                     'Electric generator', 'Other', 'Solar Multi-Light Product', 'Solar Home System', 'Solar Lantern',
                     'Local Mini Grid Connection', 'Rechargeable Battery']

#Third, these steps tell altair what variables to refer to when creating the filters
electricity_source_dropdown=alt.binding_select(options=electricity_source, #Match the first list
                                 labels = electricity_source_labels, #Match the second list
                                 name = "Primary source of electricity  ")  #This is the name of the filter the user sees
electricity_source_select=alt.selection_single(fields=['c182'],  #Change the column name to the column the data comes from
                                bind=electricity_source_dropdown) 


# Encoding the filter example
# After the chart is created in the first 6 lines, you can then add in the code that adds the filter to the chart
final = (chart.mark_bar(color = '#304ca2') + chart.mark_text( # This blue is from the AIP site.
    align = 'center', 
    baseline='bottom'
).encode(
    text = alt.Text('sum(pct):Q', 
                    format = '.2%'))
) #.add_selection(                      # <------------ Add in ".add_selection(...."
    electricity_source_select
).transform_filter(
    electricity_source_select 
) #.add_selection(                    # <------------  Close new filter section addition with a )
    locality_select
).transform_filter(
    locality_select
).add_selection(
    tier_select
).transform_filter(
    tier_select 
).properties(                        
    title={
      "text": ["What is your household’s main source of drinking water?"], 
 #     "subtitle": ["You are viewing {}% of responses from the Nigeria Multi-Tiered Framework Survey.".format(pct), " "],
      "color": "black",
      "subtitleColor": "black"
    }
)

In [None]:
# Save as an HTML file
final.save('final.html')

# Save as a JSON file
final.save('final.json')