## When using plotly in colab and on desktop, create the ipynb file in the desktop first as the colab version will be corrupted if it is copied back to the desktop

In [1]:
# Standard plotly imports
import chart_studio.plotly as py
import plotly.offline as py
import plotly.graph_objs as go # version of plotly is 4.4.1
# Using plotly in offline mode

import cufflinks as cf
cf.go_offline(connected=True)
cf.set_config_file(offline=False, world_readable=True)

py.init_notebook_mode(connected=True)
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
import os
import sys
import plotly.express as px
import plotly.io as pio

from plotly.subplots import make_subplots

In [2]:
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target

In [3]:
# test environment to see if running in colab or not
colab = os.environ.get('COLAB_GPU', '10')
if (int(colab) == 0):
  pio.renderers.default = "colab"
  print('colab found using render type colab')
else:
  pio.renderers.default = "plotly_mimetype+notebook_connected"
  print('colab not found using render type plotly_mimetype+notebook_connected')

colab not found using render type plotly_mimetype+notebook_connected


In [4]:
df['RAD_CAT']=df['RAD'].astype(str)
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,RAD_CAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,1.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,2.0
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,2.0
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,3.0
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,1.0
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,1.0
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,1.0
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,1.0


In [5]:
#fig = go.Figure(data=[go.Histogram(x=df['TAX'] )])
hist1 = go.Histogram(x=df['TAX'] )# df['TAX'] is a pandas series

data = [hist1]
layout = go.Layout(
    title='TAX Distribution',
    xaxis=dict(
    title='TAX'
    ),
    yaxis=dict(
        title='Count'
    )
)

#fig.show() # replace fig.show() with py.iplot([])
fig = go.Figure(data=data,layout=layout)
py.iplot(fig)


In [6]:
df['TAX'].iplot(kind='hist', xTitle='TAX',
                  yTitle='count', title='TAX Distribution')

In [7]:
# overlay using a stacked df example
x0 = np.random.randn(250)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1

dfa =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))
print(dfa)
fig = px.histogram(dfa, x="data", color="series", barmode="group", opacity=0.75, )
fig.show()

    series      data
0        a  0.632312
1        a -0.821015
2        a  0.229466
3        a  0.287617
4        a -1.871639
..     ...       ...
745      b  1.200266
746      b  1.523255
747      b  0.232336
748      b -0.007820
749      b  2.153008

[750 rows x 2 columns]


In [8]:
# overlay 2 datasets in one histogram

hist1 = go.Histogram(x=df['CRIM'], name='CRIM' )
hist2 = go.Histogram(x=df['INDUS'], name='INDIS' )

data = [hist1, hist2]

layout = go.Layout(
    title='Comparing CRIM and INDUS',
    barmode='overlay',
    xaxis=dict(
    title='Value'
    ),
    yaxis=dict(
        title='Count'
    ),
    yaxis2=dict(
        title='Count',
        anchor='free',
        overlaying='y',
        side='right',
        position=1
    )        
)

fig = go.Figure(data=data,layout=layout)
py.iplot(fig)

In [9]:
# express style 
fig = px.histogram(df, x="CRIM", color="RAD_CAT", barmode="group")
fig.show()

In [10]:
fig = px.box(df, x="RAD_CAT", y="CRIM")
fig.show()

In [11]:
# scatter plot
fig = px.scatter(df, x='CRIM', y='INDUS', color='RAD_CAT', size='RAD')
fig.show()

In [12]:
import plotly.figure_factory as ff
fig = ff.create_scatterplotmatrix(
    df[['CRIM', 'INDUS', 'RAD_CAT',      
        'AGE','TAX']],
    diag='histogram',
    index='RAD_CAT')
fig.show()

In [13]:
corrs = df.corr()
fig = ff.create_annotated_heatmap(
    z=corrs.values,
    x=list(corrs.columns),
    y=list(corrs.index),
    annotation_text=corrs.round(2).values,
    showscale=True)

In [14]:
fig.show()

plotly subplots simple example

In [17]:
fig = make_subplots(rows=1, cols=2)

fig.add_trace(
    go.Scatter(x=[1, 2, 3], y=[4, 5, 6]),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=[20, 30, 40], y=[50, 60, 70]),
    row=1, col=2
)

fig.update_layout(height=600, width=800, title_text="Side By Side Subplots")
fig.show()

plotly subplots simple example

In [24]:
fig = make_subplots(rows=1, cols=2)

hist1 = go.Histogram(x=df['CRIM'], name='CRIM' )
hist2 = go.Histogram(x=df['INDUS'], name='INDIS' )

fig.add_trace(hist1, row=1, col=1)

#px.scatter(df, x='CRIM', y='INDUS', color='RAD_CAT', size='RAD')
#fig.add_trace(hist2, row=1, col=2)

fig.add_trace(
    go.Scatter(x=df['CRIM'], y=df['INDUS'],mode= 'markers'),
    row=1, col=2, 
)

fig.update_layout(height=600, width=800, title_text="Side By Side Subplots")
fig.show()