# from before:

In [1]:
import pandas as pd
import numpy as np
import bqplot
# our usual stuff
%matplotlib inline
# lets add in some exprot data
comm = pd.read_csv('/Users/jillnaiman/Downloads/total_export.csv')

In [2]:
# grab years
years = list(comm.columns.values)
years = np.array(years[1:]) # get rid of state
# as numbers
years = years.astype('int')
years

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017])

In [3]:
sc_geo = bqplot.AlbersUSA()
state_data = bqplot.topo_load('map_data/USStatesMap.json')

def_tt = bqplot.Tooltip(fields=['id', 'name'])

states_map = bqplot.Map(map_data=state_data, scales={'projection':sc_geo}, tooltip=def_tt)
states_map.interactions = {'click': 'select', 'hover': 'tooltip'}

fig=bqplot.Figure(marks=[states_map], title='US States Map Example',
                  fig_margin={'top': 0, 'bottom': 0, 'left': 0, 'right': 0})


# lets also make a line plot
# second, the lineplot
x_scl = bqplot.LinearScale() 
y_scl = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Year', scale=x_scl)
ax_ycl = bqplot.Axis(label='Total Export from State NA', 
                     scale=y_scl, 
                    orientation='vertical', side='left')
lines = bqplot.Lines(x = years, y = np.zeros(len(years)),
                    scales = {'x': x_scl, 'y': y_scl})
#print(lines)
fig_lines = bqplot.Figure(marks = [lines], 
                          axes = [ax_ycl, ax_xcl],)

# let do something additive for all states selected
def get_data_value(change):
    exports = np.zeros(len(years))
    snames = ''
    if change['owner'].selected is not None:
        for i,s in enumerate(change['owner'].selected):
            sn = state_names[s == ids][0]
            snames += sn + ', '
            # because of formatting, things are in arrays hence [0]
            #  also, take out state name hence [1:]
            # NOTE! BQPLOT has misspelled massachussetts!
            if sn == 'Massachusetts': sn = 'Massachussetts'
            exports_in=comm.loc[comm['State'] == sn].values[0][1:]
            # there are ","'s in exports we gotta take out
            exports_in = np.array([exports_in[i].replace(',','') for i in range(len(exports_in))])
            exports = np.add(exports, exports_in.astype('float64'))
        lines.y = exports
        ax_ycl.label='Total Export from ' + snames
    else:
        lines.y = np.zeros(len(exports))
        ax_ycl.label='Total Export from NA'

states_map.observe(get_data_value,'selected')

# some formatting for vertical
#fig_lines.layout.max_height='250px'
#fig_lines.layout.min_width='800px'
#fig.layout.min_width='800px'
#ipywidgets.VBox([fig_lines,fig])
ipywidgets.HBox([fig,fig_lines])

NameError: name 'bqplot' is not defined

# Activity #1: MarketMap
* another way to visualize mappable data

## 1.a : explore the dataset

In [None]:
#!pip install xlrd # JPN, might have to run this

# note: this is quering from the web!  How neat is that??
df = pd.read_excel('https://query.data.world/s/ivl45pdpubos6jpsii3djsjwm2pcjv', skiprows=5)
# the above might take a while to load all the data

In [None]:
# save as CSV if you wanna
#df.to_csv('/Users/jillnaiman/Downloads/market_map_data.csv')

In [None]:
# or can try:
#df = pd.read_csv('/Users/jillnaiman/Downloads/market_map_data.csv')

In [None]:
# what is in this dataframe? lets take a look at the top
df.head()
# this dataset is called: "Surgery Charges Across the U.S."
#  and its just showing us how much different procedures 
#  cost from different hospitals

In [None]:
# what kinds of data are we working with?
df.dtypes

In [None]:
# lets look at some summary data
# recall: this is like R's "summary" function
df.describe()
# so, things like the mean zipcode aren't
#  meaningful, same thing with provider ID
#  But certainly looking at the average
#  total payments, discharges, might 
#  be useful

In [None]:
# lets look at how many seperate types of surgery are 
#  represented in this dataset:
df["DRG Definition"].unique().size

In [None]:
# what about how many provider (hospital) names?
df["Provider Name"].unique().size

In [None]:
# how many states are represented
df["Provider State"].unique().size

In [None]:
# what are the state codes?
df["Provider State"].unique()

In [None]:
# lets figure out  what the most common surgeries are via how 
#  many many folks are discharged after each type of surgery
# (1)
most_common = df.groupby("DRG Definition")["Total Discharges"].sum()
most_common

# (2) but lets sort by the largest on top
most_common = df.groupby("DRG Definition")["Total Discharges"].sum().sort_values(ascending=False)
most_common

# (3) lets look at only the top 5, for fun
most_common[:5]

# (4) or we can only look at the names of the top 5:
most_common[:5].index.values

## 1.b: formatting data for MarketMap
* here we are going to practice doing some fancy things to clean this data
* this will be good practice for when you run into other datasets "in the wild"

In [None]:
# (1) lets create a little table of total discharges for
#  each type of surgery & state
total_discharges = df.groupby(["DRG Definition", "Provider State"])["Total Discharges"].sum()
total_discharges

# (2) the above is not intuative, lets prettify it
total_discharges = df.groupby(["DRG Definition", "Provider State"])["Total Discharges"].sum().unstack()
total_discharges

### Aside: lets quick check out what are the most frequent surgeries

In [None]:
# for our map, we are going to want to 
# normalize the discharges or each surgery 
# for each 
# state by the total discharges across all 
# states for a particular type of surger
#  lets add this to our total_discharges DF
total_discharges["Total"] = total_discharges.sum(axis = 1)
total_discharges["Total"].head() # just look at the first few

In [None]:
# finally, lets check out the most often
#  performed surgery across all states

# we can do this by sorting our DF by this total we just
# calculated:
total_discharges.sort_values(by = "Total", 
                             ascending=False, 
                             inplace = True)

# now lets just look at the first few of our 
#  sorted array
total_discharges.head()

# so, from this we see that joint replacement
#  or reattachment of a lower extremeity is 
#  the most likely surgery (in number of discharges)
# followed by surgeries for sepsis and then heart failure

In [None]:
# neat.  We won't need these for plotting, so we can remove our
# total column we just calculated
del total_discharges["Total"]
total_discharges.head()
# now we see that we are back to just states & surgeries
#  *but* our sorting is still by the total that we 
# previously calculated.
# spiffy!

## 1.c: plot data with bqplot

In [None]:
import bqplot
# by default bqplot does not import 
#  all packages, we have to 
#  explicitely import market_map
import bqplot.market_map # for access to market_map

In [None]:
# lets do our usual thing, but with a market map
#  instead of a heat map

# scales:
x_sc, y_sc = bqplot.OrdinalScale(), bqplot.OrdinalScale() # note, just a different way to call things
c_sc = bqplot.ColorScale(scheme="Blues")

# just a color axes for now:
c_ax = bqplot.ColorAxis(scale = c_sc, orientation = 'vertical')

# lets make the market map:

# (1) what should we plot for our color? lets take a look:
total_discharges.iloc[0].values, total_discharges.columns.values
# this is the total discharges for the most 
# popular surgical procedure
# the columns will be states

# (2) lets put this into a map
mmap = bqplot.market_map.MarketMap(color = total_discharges.iloc[0].values, 
                                  names = total_discharges.columns.values,
                                  scales={'color':c_sc}, 
                                   axes=[c_ax])

# (3) ok, but just clicking on things doesn't tell us too much
# lets add a little label to print out the total of the selected
import ipywidgets
label = ipywidgets.Label()
# link to market map
def get_data(change):
    # (3.1)
    #print(change['owner'].selected)
    # (3.2) loop
    v = 0.0 # to store total value
    for s in change['owner'].selected:
        v += total_discharges.iloc[0][total_discharges.iloc[0].index == s].values
    if v > 0: # in case nothing is selected
        # what are we printing?
        l = 'Total discharges of ' + \
        total_discharges.iloc[0].name + \
        ' = ' + str(v[0]) # note: v is by default an array
        label.value = l 
    
mmap.observe(get_data,'selected')
    
#mmap

# (3)
ipywidgets.VBox([label,mmap])

## Discussion:
* think back to the map we had last week: we can certainly plot this information with a more geo-realistic map
* what are the pros & cons of each style of map?  What do each highlight?  How are each biased?

## IF we have time: Re-do with other mapping system:

In [None]:
from us_state_abbrev import us_state_abbrev

sc_geo = bqplot.AlbersUSA()
state_data = bqplot.topo_load('map_data/USStatesMap.json')

#(1)
#states_map = bqplot.Map(map_data=state_data, scales={'projection':sc_geo})

#(2)
# library from last time
from states_utils import get_ids_and_names
ids, state_names = get_ids_and_names(states_map)

# color maps
import matplotlib.cm as cm
cmap = cm.Blues

# most popular surgery
popSurg = total_discharges.iloc[0]

# here, we will go through the process of getting colors to plot
#  each state with its similar color to the marketmap above:

#!pip install webcolors
from webcolors import rgb_to_hex
d = {} # empty dict to store colors
for s in states_map.map_data['objects']['subunits']['geometries']:
    if s['properties'] is not None:
        #print(s['properties']['name'], s['id'])
        # match states to abbreviations
        state_abbrev = us_state_abbrev[s['properties']['name']]
        #print(state_abbrev)
        v = popSurg[popSurg.index == state_abbrev].values[0]
        # renorm v to colors and then number of states
        v = (v - popSurg.values.min())/(popSurg.values.max()-popSurg.values.min())
        #print(v, int(cmap(v)[0]), int(cmap(v)[1]), int(cmap(v)[2]))
        # convert to from 0-1 to 0-255 rgbs
        c = [int(cmap(v)[i]*255) for i in range(3)]
        #d[s['id']] = rgb_to_hex([int(cmap(v)[0]*255), int(cmap(v)[1]*255), int(cmap(v)[2]*255)])
        d[s['id']] = rgb_to_hex(c)
    
    
def_tt = bqplot.Tooltip(fields=['name'])
    
states_map = bqplot.Map(map_data=state_data, scales={'projection':sc_geo}, colors = d, tooltip=def_tt)
# add interactions
states_map.interactions = {'click': 'select', 'hover': 'tooltip'}

# (3)
label = ipywidgets.Label()
# link to heat map
def get_data(change):
    v = 0.0 # to store total value
    if change['owner'].selected is not None:
        for s in change['owner'].selected:
            #print(s)
            sn = state_names[s == ids][0]
            state_abbrev = us_state_abbrev[sn]
            v += popSurg[popSurg.index == state_abbrev].values[0]
        if v > 0: # in case nothing is selected
            # what are we printing?
            l = 'Total discharges of ' + \
            popSurg.name + \
            ' = ' + str(v) # note: v is by default an array
            label.value = l 
    
states_map.observe(get_data,'selected')

fig=bqplot.Figure(marks=[states_map], 
                  title='US States Map Example',
                  fig_margin={'top': 0, 'bottom': 0, 'left': 0, 'right': 0}) # try w/o first and see
#fig
# (3)
ipywidgets.VBox([label,fig])

# Activity #2: Real quick ipyleaflets
* since cartopy wasn't working for folks, we'll quickly look at another option: ipyleaflets

In [None]:
#!pip install ipyleaflet
from ipyleaflet import *
# note: you might have to close and reopen you notebook
# to see the map

m = Map(center=(52, 10), zoom=8, basemap=basemaps.Hydda.Full)

#(2) street maps
strata_all = basemap_to_tiles(basemaps.Strava.All)
m.add_layer(strata_all)
m

### Note: more examples available here - https://github.com/jupyter-widgets/ipyleaflet/tree/master/examples