In [1]:
import pandas as pd
from bokeh.plotting import figure, output_file, show, output_notebook
from bokeh.models import ColorBar, ColumnDataSource
from bokeh.transform import factor_cmap
from bokeh.palettes import brewer, d3
from sklearn.preprocessing import MinMaxScaler

In [2]:
output_notebook()

In [3]:
def make_plot(data, cap):
    data = data.copy()
    data = data[data['market_cap'] == cap]
    sectors = data['sector'].unique()
    sc = MinMaxScaler((10, 100))
    data['scaled_value'] = sc.fit_transform(data['value'].values.reshape(-1, 1))
    
    tooltips = [
        ('Company', '@shortName'),
        ('Ticker', '@ticker'),
        ('Owned ($000s)', '@value{0,0}'),
        ('Industry', '@industry'),
    ]
    p = figure(plot_width=800, plot_height=800, tooltips=tooltips, title=f'Top 50 {cap.title()}-Cap Institutional Holdings', x_axis_label='Principal Component 1',
              y_axis_label='Principal Component 2')
    source = ColumnDataSource(data=data)
    cmap = factor_cmap('sector', palette=d3['Category20'][len(sectors)], factors=sectors)
    p.circle('pc1', 'pc2', source=source, size='scaled_value', color=cmap, alpha=0.7, legend_group='sector')
    p.legend.location = "top_left"
    show(p)


In [4]:
DATA = pd.read_csv('../data.csv').sort_values('value', ascending=False)
DATA.head()

Unnamed: 0,ticker,report_pd,shortName,sector,industry,value,market_cap,pc1,pc2
374,MSFT,2020-12-31,Microsoft Corporation,Technology,Software—Infrastructure,1161816000.0,mega,11.403084,-14.29878
3,AAPL,2020-12-31,Apple Inc.,Technology,Consumer Electronics,869144500.0,mega,21.070558,-7.52129
215,FB,2020-12-31,"Facebook, Inc.",Communication Services,Internet Content & Information,709390100.0,mega,1.267449,-9.624355
251,GOOGL,2020-12-31,Alphabet Inc.,Communication Services,Internet Content & Information,697685600.0,mega,-14.609293,-5.808565
34,AMZN,2020-12-31,"Amazon.com, Inc.",Consumer Cyclical,Internet Retail,577396000.0,mega,-14.148967,-3.995375


In [5]:
for cap in DATA['market_cap'].unique():
    output_file(f'{cap}-cap.html')
    make_plot(DATA, cap)