In [0]:
val zookeepers = "X.X.X.X"


In [14]:
import org.locationtech.geomesa.jupyter.L
import org.locationtech.geomesa.utils.geotools.Conversions._
import scala.collection.JavaConversions._
import vegas._
import vegas.render.HTMLRenderer._
import vegas.sparkExt._

implicit val displayer: String => Unit = { s => kernel.display.content("text/html", s) }

In [2]:
val df = spark.read.
  format("geomesa").
  option("zookeepers", zookeepers).
  option("instanceId", "gis").
  option("user", "root").
  option("password", "secret").
  option("tableName", "geomesa.gdelt").
  option("geomesa.feature", "gdelt").
  load()

In [3]:
df.createOrReplaceTempView("gdelt")

In [4]:
spark.sql("DESCRIBE gdelt").show()

+-------------------+---------+-------+
|           col_name|data_type|comment|
+-------------------+---------+-------+
|            __fid__|   string|   null|
|      globalEventId|   string|   null|
|          eventCode|   string|   null|
|      eventBaseCode|   string|   null|
|      eventRootCode|   string|   null|
|        isRootEvent|      int|   null|
|         actor1Name|   string|   null|
|         actor1Code|   string|   null|
|  actor1CountryCode|   string|   null|
|    actor1GroupCode|   string|   null|
|   actor1EthnicCode|   string|   null|
|actor1Religion1Code|   string|   null|
|actor1Religion2Code|   string|   null|
|         actor2Name|   string|   null|
|         actor2Code|   string|   null|
|  actor2CountryCode|   string|   null|
|    actor2GroupCode|   string|   null|
|   actor2EthnicCode|   string|   null|
|actor2Religion1Code|   string|   null|
|actor2Religion2Code|   string|   null|
+-------------------+---------+-------+
only showing top 20 rows



In [5]:
spark.sql("select globalEventId,geom,dtg from gdelt limit 10").show()

+-------------+--------------------+--------------------+
|globalEventId|                geom|                 dtg|
+-------------+--------------------+--------------------+
|    609167622|POINT (105.667 -1...|2006-12-19 00:00:...|
|    609167628|POINT (143.876 -9...|2006-12-19 00:00:...|
|    609167626|POINT (143.876 -9...|2006-12-19 00:00:...|
|    608826829|POINT (-122.637 3...|2006-12-18 00:00:...|
|    608800220|POINT (-106.237 3...|2006-12-18 00:00:...|
|    608837377|      POINT (-97 38)|2006-12-18 00:00:...|
|    609122526|      POINT (-97 38)|2006-12-19 00:00:...|
|    608695865|    POINT (-8 12.65)|2006-12-18 00:00:...|
|    608624384|POINT (-77.0364 3...|2006-12-18 00:00:...|
|    608870504|POINT (-74.9384 4...|2006-12-19 00:00:...|
+-------------+--------------------+--------------------+



In [6]:
spark.sql(
"""
select globalEventId,geom,dtg 
from gdelt 
where st_contains(geom,st_geomFromWKT('POLYGON((-98 37,-95 37,-95 40,-98 40,-98 37))')) limit 10
"""
).show()

+-------------+--------------------+--------------------+
|globalEventId|                geom|                 dtg|
+-------------+--------------------+--------------------+
|    610438414|POINT (-97.0384 3...|2016-12-21 00:00:...|
|    610438451|POINT (-97.0384 3...|2016-12-21 00:00:...|
|    610248202|POINT (-97.0384 3...|2016-12-20 00:00:...|
|    610438450|POINT (-97.0384 3...|2016-12-21 00:00:...|
|    610248264|POINT (-96.8336 3...|2016-12-20 00:00:...|
|    610231190|POINT (-96.8336 3...|2016-12-20 00:00:...|
|    608550784|POINT (-96.8336 3...|2016-12-15 00:00:...|
|    610231236|POINT (-96.8336 3...|2016-12-20 00:00:...|
|    608761515|POINT (-96.2336 3...|2016-12-15 00:00:...|
|    608760419|POINT (-96.2336 3...|2016-12-15 00:00:...|
+-------------+--------------------+--------------------+



In [7]:
val ds = org.geotools.data.DataStoreFinder.getDataStore(
Map(
"zookeepers" -> zookeepers,
"instanceId" -> "gis",
"user" -> "root",
"password" -> "secret",
"tableName" -> "geomesa.gdelt"))


In [8]:
val fs = ds.getFeatureSource("gdelt")

In [9]:
val sf = fs.getFeatures().features.take(5).toList.map(_.getDefaultGeometry.asInstanceOf[com.vividsolutions.jts.geom.Point]).map { p => L.Circle(p.getX, p.getY, 50, L.StyleOptions())}

In [10]:
L.show(sf)

In [16]:
val df = spark.sql("select eventCode,count(eventCode) as count from gdelt group by eventCode order by count desc limit 20")

In [17]:
df.show()

                                                                                +---------+------+
|eventCode| count|
+---------+------+
|      010|100980|
|      042| 79710|
|      043| 76407|
|      020| 71148|
|      051| 68348|
|      040| 65268|
|      190| 48817|
|      036| 42550|
|      173| 40695|
|      046| 33845|
|      090| 26066|
|      013| 25580|
|      111| 25370|
|      120| 25263|
|      112| 23002|
|      012| 22725|
|      193| 22351|
|      030| 21840|
|      110| 21369|
|      071| 19856|
+---------+------+



In [18]:
Vegas("Event Code").
  withDataFrame(df).
  encodeX("eventCode", Nom).
  encodeY("count", Quant).
  mark(Bar).
  show(displayer)