In [1]:
import pyspark
from pyspark.conf import SparkConf
from pyspark.sql import SparkSession, SQLContext, Row
import time, os
import gmplot

from src.GPSProcessing import *

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [3]:
%%HTML
<style> .CodeMirror pre { font-size: 120% !important; } </style>

In [4]:
# REFERENCE: https://spark.apache.org/docs/latest/configuration.html

conf = SparkConf().setAll([('spark.memory.fraction','0.6'),
                           ('spark.executor.memory', '16g'),
                           ('spark.driver.memory','16g'),
                           ('spark.sql.shuffle.partitions','20'),
                           ('spark.memory.offHeap.enabled', True),
                           ('spark.memory.offHeap.size','16g'),
                           ('spark.cleaner.referenceTracking.cleanCheckpoints', True),
                           ('spark.driver.host','127.0.0.1')]
                         )

spark  = SparkSession.builder.config(conf=conf).master("local[*]").appName("trip trajectories").getOrCreate()
sc = spark.sparkContext
sc.setLogLevel("ERROR")
sc.setCheckpointDir('checkpoints')
sc.getConf().getAll()

[('spark.sql.shuffle.partitions', '20'),
 ('spark.app.name', 'trip trajectories'),
 ('spark.driver.port', '55161'),
 ('spark.driver.host', '127.0.0.1'),
 ('spark.cleaner.referenceTracking.cleanCheckpoints', 'True'),
 ('spark.executor.id', 'driver'),
 ('spark.driver.memory', '16g'),
 ('spark.executor.memory', '16g'),
 ('spark.app.id', 'local-1575395066227'),
 ('spark.rdd.compress', 'True'),
 ('spark.memory.fraction', '0.6'),
 ('spark.serializer.objectStreamReset', '100'),
 ('spark.master', 'local[*]'),
 ('spark.submit.deployMode', 'client'),
 ('spark.memory.offHeap.enabled', 'True'),
 ('spark.ui.showConsoleProgress', 'true'),
 ('spark.memory.offHeap.size', '16g')]

In [5]:
sc.defaultParallelism

8

In [6]:
sc.defaultMinPartitions

2

In [7]:
sc

Load GPS processed data

In [8]:
gps_path = '/Users/molinaro/Documents/GITHUB/HABITUS/notebooks/habitus_gps_acc_1.csv'

In [9]:
gps_data = spark.read.csv(gps_path, header=True, inferSchema=True)

In [16]:
gps_data.show()

+---+-------------------+---+---------+-----------+-----------+----------+--------+-------+--------+-----------------+------------------+-------------------+
| ID|          timestamp|dow|      lat|        lon|fixTypeCode|tripNumber|tripType|tripMOT|activity|activityIntensity|activityBoutNumber|sedentaryBoutNumber|
+---+-------------------+---+---------+-----------+-----------+----------+--------+-------+--------+-----------------+------------------+-------------------+
|  1|2016-08-16 18:23:25|  2| 51.01609|-114.099305|          2|         0|       0|      0|      65|                1|                 0|                  3|
|  1|2016-08-16 18:23:30|  2|51.016075| -114.09925|          1|         0|       0|      0|       0|                0|                 0|                  3|
|  1|2016-08-16 18:23:35|  2|51.016078|-114.099263|          1|         0|       0|      0|      55|                1|                 0|                  3|
|  1|2016-08-16 18:23:40|  2|51.016072| -114.09925| 

In [10]:
max_trip_value = gps_data.agg({"tripNumber": "max"}).collect()[0][0]

In [11]:
max_trip_value

194

In [12]:
dataframe = gps_data.select(['lat','lon']).toPandas()

lat_list = dataframe['lat'].tolist()
lon_list = dataframe['lon'].tolist()

In [13]:
min_lat, max_lat, min_lon, max_lon = min(lat_list), max(lat_list), min(lon_list), max(lon_list)

gmap = gmplot.GoogleMapPlotter(min_lat + (max_lat - min_lat) / 2, 
                               min_lon + (max_lon - min_lon) / 2, 
                               16)

In [17]:
for n in range(100):

    trip_set = gps_data.filter(F.col('tripNumber') == n + 1).select(['lat','lon'])

    lat_list = trip_set.toPandas()['lat'].tolist()
    lon_list = trip_set.toPandas()['lon'].tolist()
    
    gmap.scatter(lat_list, lon_list, 'red', size = 0.5, marker = False)
    gmap.plot(lat_list, lon_list, 'blue', edge_width = 3) 

In [18]:
gmap.draw('gmap.html')