In [1]:
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as offline

offline.init_notebook_mode(connected=True)


In [2]:
# using dictionaries to generate plots
trace = dict(
    type='scattergeo',
    lon=[-97.92, 0, 78.8],
    lat=[39.3, 0, 21.76],

    marker = dict(size=10),
    mode='markers',
)
data = [trace]

layout = dict(
    showlegend=False,
    geo=dict(showland=True)
)

fig = dict(data=data, layout=layout)
offline.iplot(fig)

In [3]:
trace = go.Scattergeo(
    mode='markers',
    marker = dict(size=10),
    lon=[-97.92, 0, 78.8],
    lat=[39.3, 0, 21.76],
)
data = [trace]
layout = go.Layout(
    showlegend=False,
    geo=dict(showland=True),
)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)

In [4]:
# now we understand how bubble maps are configured, lets create from real data


In [5]:
housing_data = pd.read_csv('datasets/housing.csv')
housing_data.head(10)


Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
5,-122.25,37.85,52.0,919.0,213.0,413.0,193.0,4.0368,269700.0,NEAR BAY
6,-122.25,37.84,52.0,2535.0,489.0,1094.0,514.0,3.6591,299200.0,NEAR BAY
7,-122.25,37.84,52.0,3104.0,687.0,1157.0,647.0,3.12,241400.0,NEAR BAY
8,-122.26,37.84,42.0,2555.0,665.0,1206.0,595.0,2.0804,226700.0,NEAR BAY
9,-122.25,37.84,52.0,3549.0,707.0,1551.0,714.0,3.6912,261100.0,NEAR BAY


In [6]:
# random sample of 10% data instead of plotting the whole data
housing_data = housing_data.sample(frac=0.1).reset_index(drop=True)

housing_data.shape


(2064, 10)

In [7]:
# lets encode labels to numeric data using scikit learn library
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
housing_data['ocean_proximity_labels'] = le.fit_transform(housing_data['ocean_proximity'])


In [9]:
housing_data[['ocean_proximity', 'ocean_proximity_labels']].sample(10)


Unnamed: 0,ocean_proximity,ocean_proximity_labels
1119,<1H OCEAN,0
773,INLAND,1
492,<1H OCEAN,0
1388,<1H OCEAN,0
864,INLAND,1
313,<1H OCEAN,0
1873,INLAND,1
921,<1H OCEAN,0
792,NEAR OCEAN,3
355,<1H OCEAN,0


In [11]:
trace = go.Scattergeo(
    lat = housing_data['latitude'],
    lon = housing_data['longitude'],

    marker=dict(
        size=housing_data['median_house_value']/1000,
        sizemode='area',
        color=housing_data['ocean_proximity_labels'],
        colorscale='Portland',
        showscale=True
    ),
    mode='markers'
)

data = [trace]
layout = go.Layout(
    showlegend=False,
    geo=dict(showland=True, landcolor='yellow')
)

fig = go.Figure(data=data, layout=layout)
offline.iplot(fig)