Skip to content

Commit

Permalink
[SEDONA-547] Use scarf to collect telemetry data (#1373)
Browse files Browse the repository at this point in the history
* Add scarf

* Fix linter

* Temporarily remove CI on R release version

* Update the path

* Revert "Temporarily remove CI on R release version"

This reverts commit 108051e.
  • Loading branch information
jiayuasu committed Apr 26, 2024
1 parent 7c53678 commit 23ba9ac
Show file tree
Hide file tree
Showing 12 changed files with 131 additions and 4 deletions.
1 change: 1 addition & 0 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ on:

env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
DO_NOT_TRACK: true

permissions:
contents: read
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ env:
JAI_CORE_VERSION: "1.1.3"
JAI_CODEC_VERSION: "1.1.3"
JAI_IMAGEIO_VERSION: "1.1"
DO_NOT_TRACK: true

permissions:
contents: read
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ on:

env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
DO_NOT_TRACK: true

jobs:
build:
Expand Down
3 changes: 2 additions & 1 deletion R/R/dependencies.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ sedona_initialize_spark_connection <- function(sc) {
sc,
"org.apache.sedona.sql.utils.SedonaSQLRegistrator",
"registerAll",
spark_session(sc)
spark_session(sc),
"r"
)

# Instantiate all enum objects and store them immutably under
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.common.utils;

import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;

public class TelemetryCollector {

private static final String BASE_URL = "https://sedona.gateway.scarf.sh/packages/";

public static String send(String engineName, String language) {
HttpURLConnection conn = null;
String telemetrySubmitted = "";
try {
String arch = URLEncoder.encode(System.getProperty("os.arch").replaceAll(" ", "_"), "UTF-8");
String os = URLEncoder.encode(System.getProperty("os.name").replaceAll(" ", "_"), "UTF-8");
String jvm = URLEncoder.encode(System.getProperty("java.version").replaceAll(" ", "_"), "UTF-8");

// Construct URL
telemetrySubmitted = BASE_URL + language + "/" + engineName + "/" + arch + "/" + os + "/" + jvm;

// Check for user opt-out
if (System.getenv("SCARF_NO_ANALYTICS") != null && System.getenv("SCARF_NO_ANALYTICS").equals("true") ||
System.getenv("DO_NOT_TRACK") != null && System.getenv("DO_NOT_TRACK").equals("true") ||
System.getProperty("SCARF_NO_ANALYTICS") != null && System.getProperty("SCARF_NO_ANALYTICS").equals("true") ||
System.getProperty("DO_NOT_TRACK") != null && System.getProperty("DO_NOT_TRACK").equals("true")){
return telemetrySubmitted;
}

// Send GET request
URL url = new URL(telemetrySubmitted);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.connect();
int responseCode = conn.getResponseCode();
// Optionally check the response for successful execution
if (responseCode != 200) {
// Silent handling, no output or log
}
} catch (Exception e) {
// Silent catch block
} finally {
if (conn != null) {
conn.disconnect();
}
}
return telemetrySubmitted;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.common.telemetry;

import org.apache.sedona.common.utils.TelemetryCollector;
import org.junit.Test;

public class TelemetryTest
{
@Test
public void testTelemetryCollector()
{
assert TelemetryCollector.send("test", "java").contains("https://sedona.gateway.scarf.sh/packages/java/test");
}
}
3 changes: 3 additions & 0 deletions docs/asf/telemetry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Apache Sedona uses Scarf to collect anonymous usage data to help us understand how the software is being used and how we can improve it. You can opt out of telemetry collection by setting the environment variable `SCARF_NO_ANALYTICS` or `DO_NOT_TRACK` to `true` on your local machine, or the driver machine of your cluster.

Scarf fully supports the GDPR and is allowed by [the Apache Software Foundation privacy policy](https://privacy.apache.org/faq/committers.html). The privacy policy of Scarf is available at [https://about.scarf.sh/privacy-policy](https://about.scarf.sh/privacy-policy).
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.sedona.common.geometryObjects.Circle;
import org.apache.sedona.common.geometrySerde.GeometrySerde;
import org.apache.sedona.common.geometrySerde.SpatialIndexSerde;
import org.apache.sedona.common.utils.TelemetryCollector;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.GeometryCollection;
import org.locationtech.jts.geom.LineString;
Expand All @@ -46,6 +47,7 @@ public class SedonaContext
*/
public static StreamTableEnvironment create(StreamExecutionEnvironment env, StreamTableEnvironment tblEnv)
{
TelemetryCollector.send("flink", "java");
GeometrySerde serializer = new GeometrySerde();
SpatialIndexSerde indexSerializer = new SpatialIndexSerde(serializer);
env.getConfig().registerTypeWithKryoSerializer(Point.class, serializer);
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ nav:
- Thanks: https://www.apache.org/foundation/thanks.html" target="_blank
- Security: https://www.apache.org/security/" target="_blank
- Privacy: https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank
- Telemetry: asf/telemetry.md
repo_url: https://github.com/apache/sedona
repo_name: apache/sedona
theme:
Expand Down
2 changes: 1 addition & 1 deletion python/sedona/spark/SedonaContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def create(cls, spark: SparkSession) -> SparkSession:
"""
spark.sql("SELECT 1 as geom").count()
PackageImporter.import_jvm_lib(spark._jvm)
spark._jvm.SedonaContext.create(spark._jsparkSession)
spark._jvm.SedonaContext.create(spark._jsparkSession, "python")
return spark

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.sedona.spark

import org.apache.sedona.common.utils.TelemetryCollector
import org.apache.sedona.core.serde.SedonaKryoRegistrator
import org.apache.sedona.sql.UDF.UdfRegistrator
import org.apache.sedona.sql.UDT.UdtRegistrator
Expand All @@ -26,8 +27,11 @@ import org.apache.spark.sql.sedona_sql.optimization.SpatialFilterPushDownForGeoP
import org.apache.spark.sql.sedona_sql.strategy.join.JoinQueryDetector
import org.apache.spark.sql.{SQLContext, SparkSession}

import scala.annotation.StaticAnnotation
import scala.util.Try

class InternalApi(description: String = "This method is for internal use only and may change without notice.") extends StaticAnnotation

object SedonaContext {
def create(sqlContext: SQLContext): SQLContext = {
create(sqlContext.sparkSession)
Expand All @@ -40,6 +44,12 @@ object SedonaContext {
* @return
*/
def create(sparkSession: SparkSession):SparkSession = {
create(sparkSession, "java")
}

@InternalApi
def create(sparkSession: SparkSession, language: String):SparkSession = {
TelemetryCollector.send("spark", language)
if (!sparkSession.experimental.extraStrategies.exists(_.isInstanceOf[JoinQueryDetector])) {
sparkSession.experimental.extraStrategies ++= Seq(new JoinQueryDetector(sparkSession))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,21 @@ import org.apache.spark.sql.{SQLContext, SparkSession}
object SedonaSQLRegistrator {
@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sqlContext: SQLContext): Unit = {
SedonaContext.create(sqlContext.sparkSession)
registerAll(sqlContext, "java")
}

@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sparkSession: SparkSession): Unit =
SedonaContext.create(sparkSession)
registerAll(sparkSession, "java")

@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sqlContext: SQLContext, language: String): Unit = {
SedonaContext.create(sqlContext.sparkSession, language)
}

@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sparkSession: SparkSession, language: String): Unit =
SedonaContext.create(sparkSession, language)

def dropAll(sparkSession: SparkSession): Unit = {
UdfRegistrator.dropAll(sparkSession)
Expand Down

0 comments on commit 23ba9ac

Please sign in to comment.