# Bubble Charts in Plotly
**Abid Ali**

Email: [abdsoftfsd@gmail.com](mailto:abdsoftfsd@gmail.com)

Skype: abd.soft

In [27]:
# !pip install -U plotly


In [28]:
import pandas as pd
import plotly
import plotly.graph_objs as go
import plotly.offline as offline

offline.init_notebook_mode(connected=True)

In [29]:
trace = go.Scatter(
    x=[15, 18, 21, 25],
    y=[100, 400, 300, 200],
    mode='markers'
)

data = [trace]

offline.iplot(data)


In [30]:
import numpy as np

# Plotting Sin Curve Using Plotly Scatter Plot
x = np.linspace(0, 2 * np.pi, 100)
y = np.sin(x)

trace = go.Scatter(x=x, y=y, mode='markers')
data = [trace]

offline.iplot(data)


In [31]:
import numpy as np

# Plotting Cosine Curve Using Plotly Scatter Plot
x = np.linspace(0.1, 2 * np.pi, 100)
y = np.cos(x)

trace = go.Scatter(x=x, y=y, mode='markers')
data = [trace]

offline.iplot(data)


In [32]:
size = [25, 100, 75, 50]

trace = go.Scatter(
    x=[15, 18, 21, 25],
    y=[100, 400, 300, 200],
    mode='markers',
    marker=dict(size=size)
)

data = [trace]

offline.iplot(data)

In [43]:
i = [5, 6, 8, 4]

trace = go.Scatter(
    x=[15, 18, 21, 25], y=[100, 400, 300, 200],
    mode='markers',
    marker=dict(
        size=size,
        color=i,
        colorscale='Portland',
        showscale=True
    )
)

data = [trace]
offline.iplot(data)


In [44]:
housing_data = pd.read_csv('datasets/housing.csv')
housing_data.head(10)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
5,-122.25,37.85,52.0,919.0,213.0,413.0,193.0,4.0368,269700.0,NEAR BAY
6,-122.25,37.84,52.0,2535.0,489.0,1094.0,514.0,3.6591,299200.0,NEAR BAY
7,-122.25,37.84,52.0,3104.0,687.0,1157.0,647.0,3.12,241400.0,NEAR BAY
8,-122.26,37.84,42.0,2555.0,665.0,1206.0,595.0,2.0804,226700.0,NEAR BAY
9,-122.25,37.84,52.0,3549.0,707.0,1551.0,714.0,3.6912,261100.0,NEAR BAY


In [45]:
housing_data.shape

(20640, 10)

In [46]:
# visualizing 20640 records can be problematic, so let's select the random records
housing_data = housing_data.sample(frac=0.07).reset_index(drop=True)
housing_data.shape

(1445, 10)

In [49]:
housing_data['total_rooms'].max()

30405.0

In [52]:
housing_data.loc[housing_data['total_rooms'] >= 20000][['total_rooms', 'total_bedrooms']]

Unnamed: 0,total_rooms,total_bedrooms
170,21988.0,4055.0
310,20354.0,3493.0
1166,30405.0,4093.0


In [56]:
# it seems that there are a lot of rooms in this housing dataset. so we need to divide them to get lower numbers.
trace = go.Scatter(
    x=housing_data['median_income'],
    y=housing_data['median_house_value'],
    mode='markers',
    marker=dict(
        size=housing_data['total_rooms'],
        sizeref=500, # divide total_rooms by 500 to get a smaller number for showing size of the bubble
        color=housing_data['housing_median_age'],
        colorscale='Jet',
        showscale=True
    )
)
data = [trace]

layout = go.Layout(
    height=600,
    width=900,
    title='Housing Data',
    hovermode='closest',
)

fig = go.Figure(data=data, layout=layout)

offline.iplot(fig)