https://towardsdatascience.com/interactive-visualizations-with-plotly-ea3f8feb87d1

# Initialisation

In [None]:
pip install plotly



In [48]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Scatter plots

Get new dataset and aggregate it ... for this example we'll use the colab default datasets.

In [2]:
dataset = pd.read_csv("/content/sample_data/california_housing_train.csv")
dataset.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925,65500.0


In [3]:
dataset.columns

Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value'],
      dtype='object')

In [25]:
def group(age):
  if (age < 10):
    return "0-10"
  elif (age < 20):
    return "10-20"
  elif (age < 30):
    return "20-30"
  elif (age < 40):
    return "30-40"
  else:
    return "40+"

# Create an aggregat by age group
ds_grp_age = dataset
ds_grp_age["agegroup"] = [group(x) for x in ds_grp_age["housing_median_age"] ]
ds_grp_age = ds_grp_age[['agegroup', 'median_house_value', 'median_income', 'total_rooms', 'population']]
gpr_age = pd.DataFrame()
gpr_age["value"] = ds_grp_age.groupby(by=['agegroup']).median_house_value.mean()
gpr_age["age"] = ds_grp_age.groupby(by=['agegroup']).agegroup.max()
gpr_age["income"] = ds_grp_age.groupby(by=['agegroup']).median_income.mean()
gpr_age["rooms"] = ds_grp_age.groupby(by=['agegroup']).total_rooms.mean()
gpr_age["population"] = ds_grp_age.groupby(by=['agegroup']).population.mean()

# Create an aggregat by age
ds_age = dataset
ds_age = ds_age[['median_house_value', 'median_income', 'total_rooms', 'population', 'housing_median_age']]
agg_age = pd.DataFrame()
agg_age["value"] = ds_age.groupby(by=['housing_median_age']).median_house_value.mean()
agg_age["age"] = ds_age.groupby(by=['housing_median_age']).housing_median_age.max()
agg_age["income"] = ds_age.groupby(by=['housing_median_age']).median_income.mean()
agg_age["rooms"] = ds_age.groupby(by=['housing_median_age']).total_rooms.mean()
agg_age["population"] = ds_age.groupby(by=['housing_median_age']).population.mean()
agg_age["agegroup"] = agg_age["age"].apply(group)

In [26]:
gpr_age.head()

Unnamed: 0_level_0,value,age,income,rooms,population
agegroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0-10,205608.955842,0-10,4.658022,4910.25299,2329.643054
10-20,190392.479807,10-20,4.047213,3334.209414,1692.441829
20-30,206356.006274,20-30,3.905508,2796.712422,1536.551066
30-40,206444.430818,30-40,3.776766,2066.478616,1228.649686
40+,228626.052354,40+,3.581867,1794.74671,1011.576484


In [32]:
agg_age["agegroup"].value_counts()

40+      13
30-40    10
10-20    10
20-30    10
0-10      9
Name: agegroup, dtype: int64

Show the scatter chart

In [8]:
fig = go.Figure(data=go.Scatter(x=agg_age["age"], y=agg_age["value"], mode='markers'))
fig.show()

# Lines

In [9]:
fig = go.Figure(data=go.Scatter(x=agg_age["age"], y=agg_age["population"], mode='lines+markers', name='population'))
fig.add_trace(go.Scatter(x=agg_age["age"], y=agg_age["rooms"], mode='lines+markers', name='rooms'))
fig.show()

# Bar Chart

In [10]:
fig = go.Figure(data=go.Bar(x=gpr_age["age"], y=gpr_age["rooms"]))
fig.show()

In [12]:
fig = px.pie(gpr_age, values='rooms', names='age', title='Rooms / Age')
fig.show()

# Enrich chart data viz  (Coord + size + color)

In [13]:
fig = px.scatter(agg_age, 
                 x="age", 
                 y="income", 
                 color="value", 
                 size='rooms',
                 hover_data=['population'])
fig.show()

# Subplots - several charts

In [44]:
fig = px.bar(
    agg_age, 
    x="population", 
    y="income", 
    color="rooms", 
    facet_col="agegroup", 
    title="title"
)
fig.show()

In [47]:
fig = px.scatter(
    agg_age, 
    x="population", 
    y="income", 
    color="rooms", 
    facet_row="agegroup", 
    title="title"
)
fig.show()

In [54]:
fig = make_subplots(rows=1, cols=2)

fig.add_bar(x=gpr_age["age"], 
                y=gpr_age["rooms"], 
                marker=dict(color="LightBlue"),
                name="A", 
                row=1, 
                col=1)

fig.add_scatter(x=agg_age["age"], 
                y=agg_age["population"],
                marker=dict(size=15, color="Blue"),
                mode="markers",
                name="B", 
                row=1, 
                col=2)

fig.show()