In [5]:
import pandas as pd
from bokeh.plotting import figure, output_file, show, output_notebook
from bokeh.models import ColorBar, ColumnDataSource, Panel, Tabs
from bokeh.transform import factor_cmap
from bokeh.palettes import brewer, d3
from sklearn.preprocessing import MinMaxScaler

In [2]:
output_notebook()

In [3]:
market_caps = ['mega', 'large', 'mid', 'small', 'micro']
data = {}
for cap in market_caps:
    data[cap] = pd.read_csv(f'../data-{cap}.csv')

In [14]:
def make_plot(data, cap):
    data = data.copy()
    sectors = data['sector'].unique()
    sc = MinMaxScaler((10, 100))
    data['scaled_value'] = sc.fit_transform(data['value'].values.reshape(-1, 1))
    
    tooltips = [
        ('Company', '@company'),
        ('Ticker', '@ticker'),
        ('Owned ($000s)', '@value{0,0}'),
        ('Industry', '@industry'),
    ]
    p = figure(plot_width=800, plot_height=800, tooltips=tooltips, title=f'Top 50 {cap.title()}-Cap Institutional Holdings', x_axis_label='Principal Component 1',
              y_axis_label='Principal Component 2')
    source = ColumnDataSource(data=data)
    cmap = factor_cmap('sector', palette=d3['Category20'][len(sectors)], factors=sectors)
    p.circle('pc1', 'pc2', source=source, size='scaled_value', color=cmap, alpha=0.7, legend_group='sector')
    p.legend.location = "top_left"
    return p
#     show(p)


In [16]:
tabs = []
for cap in market_caps:
    tabs.append(Panel(child=make_plot(data[cap], cap), title=cap.title()))
output_file('plots.html')
show(Tabs(tabs=tabs))