In [18]:
import pandas as pd
import bqplot
import numpy as np
import ipywidgets
import matplotlib.pyplot as plt

In [19]:
building = pd.read_csv('building_inventory.csv')

In [20]:
building

Unnamed: 0,Agency Name,Location Name,Address,City,Zip code,County,Congress Dist,Congressional Full Name,Rep Dist,Rep Full Name,...,Bldg Status,Year Acquired,Year Constructed,Square Footage,Total Floors,Floors Above Grade,Floors Below Grade,Usage Description,Usage Description 2,Usage Description 3
0,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,1975,1975,144,1,1,0,Unusual,Unusual,Not provided
1,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004,2004,144,1,1,0,Unusual,Unusual,Not provided
2,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004,2004,144,1,1,0,Unusual,Unusual,Not provided
3,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004,2004,144,1,1,0,Unusual,Unusual,Not provided
4,Department of Natural Resources,Anderson Lake Conservation Area - Fulton County,Anderson Lake C.a.,Astoria,61501,Fulton,17,Cheri Bustos,93,Hammond Norine K.,...,In Use,2004,2004,144,1,1,0,Unusual,Unusual,Not provided
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8857,Department of Transportation,Belvidere Maintenance Storage Facility - Boone...,9797 Illinois Rte. 76,Belvidere,61008,Boone,16,Adam Kinzinger,69,Sosnowski Joe,...,In Use,0,0,432,1,0,0,Storage,,
8858,Department of Transportation,Belvidere Maintenance Storage Facility - Boone...,9797 Illinois Rte 76,Belvidere,61008,Boone,16,Adam Kinzinger,69,Sosnowski Joe,...,In Use,0,0,330,1,0,0,Storage,,
8859,Department of Transportation,Quincy Maintenance Storage Facility,800 Koch's Lane,Quincy,62305,Adams,18,Darin M. LaHood,94,Frese Randy E.,...,In Use,0,1987,130,1,0,0,Storage,High Hazard,
8860,Illinois Community College Board,Illinois Valley Community College - Oglesby,815 North Orlando Smith Avenue,Oglesby,61348,LaSalle,16,Adam Kinzinger,76,Long Jerry Lee,...,In Use,1971,1971,49552,1,1,0,Education,Education,Not provided


### Left component

In [33]:
building['Congress Dist'].unique()

array([17, 15, 16, 18, 11, 12,  3, 13,  6,  0, 14,  7,  8,  9,  2,  1, 10,
        5,  4], dtype=int64)

In [34]:
agency_data = building['Agency Name'].unique()
len(agency_data)

35

In [35]:
agency_dic = {}
for i, name in enumerate(building['Agency Name'].unique()):
    agency_dic[name]=i

In [36]:
building['transformed_agency'] = building['Agency Name'].apply(lambda x: agency_dic[x])

In [37]:
# create a diction as the mapping of real agency name and transformed value 
agen_dic = building.set_index(['transformed_agency'])['Agency Name'].to_dict()

# sort the agency name to fit the numeric scale
named_scale = []
for i in range(35):
    named_scale.append(agen_dic[i])

In [38]:
def generate_2d_hist_from_cong_agency(data, ncong=19, nagen=35, takeLog=True):
    cong_bins = [i for i in range(ncong+1)]
    agen_bins = [i for i in range(nagen+1)]
    hist2d, cong_edges, agen_edges = np.histogram2d(building['transformed_agency'], 
                                                    building['Congress Dist'], 
                                                    weights=building['Square Footage'], # weighted by duration in seconds
                                                    bins = [agen_bins, cong_bins])
    
    hist2d = hist2d.T
    if takeLog: # log10 transformation of the colormap
        hist2d[hist2d <= 0] = np.nan # set zeros to NaNs
        hist2d = np.log10(hist2d)
        
    return hist2d, cong_edges, agen_edges

In [39]:
hist2d, cong_edges, agen_edges = generate_2d_hist_from_cong_agency(building)

In [40]:
data = building.groupby(['Congress Dist','transformed_agency'])
sum_data = data.sum()

In [41]:
# 1. Data

# 2. Scale
x_sc = bqplot.OrdinalScale() # true scale
x_named_sc = bqplot.OrdinalScale(domain=named_scale) # visual scale, with real agency names
y_sc = bqplot.OrdinalScale()
col_sc = bqplot.ColorScale(scheme="Blues")

# 3. Axes
ax_x = bqplot.Axis(scale=x_named_sc, tick_rotate=90)
ax_y = bqplot.Axis(scale=y_sc, label='Congress District', orientation='vertical')
ax_col = bqplot.ColorAxis(scale=col_sc, orientation='vertical', side='right')

# 4. Mark

heat_map = bqplot.GridHeatMap(color=hist2d,
                             scales={'color':col_sc, 'row':y_sc, 'column':x_sc},
                             interactions={'click':'select'},
                             anchor_style={'fill':'yellow'},
                             selected_style={'opacity':1.0},
                             unselected_style={'opacity':0.8})



# Put it together as a figure
fig = bqplot.Figure(marks=[heat_map], axes=[ax_col, ax_x, ax_y])

### Right Component

In [42]:
i, j=0, 0
# 1. Data
selected_data = building[(building['transformed_agency']==i)
                            &(building['Congress Dist']==j)
                            &(building['Year Acquired']>=1700)]
aim_data = selected_data.groupby(['Year Acquired']).sum()['Square Footage']

# 2. Scale
x_sc = bqplot.LinearScale()
y_sc = bqplot.LinearScale()

# 3. Axis
x_ax = bqplot.Axis(scale=x_sc, label='Year')
y_ax = bqplot.Axis(scale=y_sc, label='Acquired Footage / Thousand(s)', orientation='vertical')

# 4. Mark
line = bqplot.Lines(x=aim_data.index,
                    y=aim_data/1000, 
                    scales={'x':x_sc, 'y':y_sc})

fig_year = bqplot.Figure(marks=[line], axes=[x_ax, y_ax])

In [45]:
# 5. Interactions
def on_select(change):
    if len(change['owner'].selected) == 1: 
        i,j = change['owner'].selected[0]
        v = hist2d[i,j] # grab data value
        if not np.isnan(v):
            selected_data = building[(building['transformed_agency']==j)
                            &(building['Congress Dist']==i)
                            &(building['Year Acquired']>=1700)]
            aim_data = selected_data.groupby(['Year Acquired']).sum()['Square Footage']
            
            if len(aim_data)==1:
                mySelectedLabel.value = "Only Single Item"
                line.x = []
                line.y = []
                
            else:
                mySelectedLabel.value = "Related Line Plot"
                line.x = aim_data.index
                line.y = aim_data/1000
        else:
            mySelectedLabel.value = "Null Value"
            line.x = []
            line.y = []
heat_map.observe(on_select,'selected')

In [46]:
fig.layout.max_height='400px'
fig_year.layout.max_height ='400px'
mySelectedLabel = ipywidgets.Label()
mySelectedLabel.value = "Related Line Plot"

myDashboard = ipywidgets.VBox([fig, mySelectedLabel, fig_year])
myDashboard

VBox(children=(Figure(axes=[ColorAxis(orientation='vertical', scale=ColorScale(scheme='Blues'), side='right'),…

### Narrative

Since the Agency Names are strings, the histgram2d method cannot handle it well during the construction of the grid heat map. Therefore, my visualization transformed then into a set of integers and mapping the numbers with the name.

The Acquired Year column includes many zeros. To get rid of its effects, I set the limitation of the year as 1700.

The x-scales in the grid heat map is overlapping, so I rotate 90 degrees to see them clearer. The overall acquired footage is always a huge number. I divided it by 1000 to see it clearer. Some selections includes only one sample of year and footage, thus I added an label to notify whether we meet such conditions.