In [1]:
import findspark
findspark.init()
import datetime as dt

import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *

from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *

In [2]:
sess = SparkSession.builder \
                   .master("local[*]") \
                   .config("spark.driver.memory", "48g") \
                   .getOrCreate()

In [4]:
df = sess.read.parquet("./luftdaten.info.20180103.parquet/")
df.count()

646298786

In [6]:
df = df.where("sensor_id = 189") \
       .withColumn("hour", hour(df["timestamp"])) \
       .select(["hour", "sensor_id", "lat", "lon", "P1", "P2"]) \
       .groupBy(["hour", "sensor_id", "lat", "lon"]) \
       .agg({"P1": "avg", "P2": "avg"}) \
       .orderBy(["hour", "sensor_id"]) \
       .toPandas()

In [7]:
df = df[df["hour"] == 12]
df

Unnamed: 0,hour,sensor_id,lat,lon,avg(P1),avg(P2)
24,12,189,48.781,9.192,30.072065,13.171231
25,12,189,48.813,9.125,13.530601,6.676378


In [8]:
colorscale = [[0, 'rgb(54, 50, 153)'], [0.35, 'rgb(17, 123, 215)'],
                [0.5, 'rgb(37, 180, 167)'], [0.6, 'rgb(134, 191, 118)'],
                [0.7, 'rgb(249, 210, 41)'], [1.0, 'rgb(244, 236, 21)']]
mapbox_access_token = 'pk.eyJ1IjoiamFja3AiLCJhIjoidGpzN0lXVSJ9.7YK6eRwUNFwd3ODZff6JvA'

init_notebook_mode(connected=True)

In [9]:
data = []

data.append(
    Scattermapbox(
        lon=df['lon'].values,
        lat=df['lat'].values,
        mode='markers',
        marker=Marker(
            color=df['avg(P1)'].values
        ),
    )
)
        
layout = Layout(
    margin=dict(t=0,b=0,r=0,l=0),
    autosize=True,
    hovermode='closest',
    showlegend=False,
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=48.78,
            lon=9.2
        ),
        pitch=0,
        zoom=12,
        style='dark'
    ),
)

fig = dict(data=data, layout=layout)
iplot(fig, filename='foo.html')