In [6]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('penglings.csv')
df

Unnamed: 0.1,Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,1,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,2,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007
2,3,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007
3,4,Adelie,Torgersen,,,,,,2007
4,5,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007
...,...,...,...,...,...,...,...,...,...
339,340,Chinstrap,Dream,55.8,19.8,207.0,4000.0,male,2009
340,341,Chinstrap,Dream,43.5,18.1,202.0,3400.0,female,2009
341,342,Chinstrap,Dream,49.6,18.2,193.0,3775.0,male,2009
342,343,Chinstrap,Dream,50.8,19.0,210.0,4100.0,male,2009


In [4]:
df = df.dropna(axis=0, subset='bill_length_mm')

In [15]:
hover_text = []
bubble_size = []

for index, row in df.iterrows():
    hover_text.append(('Species: {species}<br>'+
                      'Island: {island}<br>'+
                      'Flipper_length (mm): {fl}<br>'+
                      'body_mass (g): {bm}<br>'+
                      'Sex: {sex}'+
                      'Year: {year}'+
                      'Bill Length (mm): {bl}').format(species=row['species'],
                                            island=row['island'],
                                            fl=row['flipper_length_mm'],
                                            bm=row['body_mass_g'],
                                            sex=row['sex'],
                                            year=row['year'],
                                            bl=row['bill_length_mm']))
    
df['text'] = hover_text

# https://stackoverflow.com/questions/57417164/is-there-a-way-to-calculate-optimal-sizeref-value-for-plotly-scatter3d
size_max = df['bill_length_mm'].max()
sizeref = df["bill_length_mm"].max()/(15**2)

# Dictionary with dataframes for each species
species_names = ['Adelie', 'Chinstrap', 'Gentoo']
species_data = {species:df.query("species == '%s'" %species)
                              for species in species_names}

df_l = df.sort_values("bill_length_mm")
fig2 = px.scatter(df_l,
                  x=np.zeros(len(df)),
                  y=pd.qcut(df_l["bill_length_mm"], q=8, precision=0).astype(str),
                  size="bill_length_mm")


# Create figure
fig = go.Figure(
                layout=dict(
                title='Python + Plotly',
                xaxis=dict(title='Flipper Length (mm)',
                       gridcolor='white',
                       type='log',
                        gridwidth=1),
                yaxis=dict(title='Body Mass (g)',
                           gridcolor='white'),
                plot_bgcolor='rgb(243, 243, 243)',  # change bg color to grey
                ))

for species_names, species in species_data.items():
    fig.add_trace(go.Scatter(
                             x=species['flipper_length_mm'], 
                             y=species['body_mass_g'],
                             name=species_names, 
                             text=species['text'],
                             marker_size=species['bill_length_mm'],
                             ))


# Tune marker appearance and layout
fig.update_traces(mode='markers', marker=dict(sizemode='area',
                                              sizeref=sizeref, 
                                              line_width=2))


# unable to display multiple legends (bubble size & species)
fig.show()