Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SEDONA-547] Use scarf to collect telemetry data #1373

Merged
merged 5 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ on:

env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
DO_NOT_TRACK: true

permissions:
contents: read
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ env:
JAI_CORE_VERSION: "1.1.3"
JAI_CODEC_VERSION: "1.1.3"
JAI_IMAGEIO_VERSION: "1.1"
DO_NOT_TRACK: true

permissions:
contents: read
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ on:

env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
DO_NOT_TRACK: true

jobs:
build:
Expand Down
3 changes: 2 additions & 1 deletion R/R/dependencies.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ sedona_initialize_spark_connection <- function(sc) {
sc,
"org.apache.sedona.sql.utils.SedonaSQLRegistrator",
"registerAll",
spark_session(sc)
spark_session(sc),
"r"
)

# Instantiate all enum objects and store them immutably under
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.common.utils;

import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;

public class TelemetryCollector {

private static final String BASE_URL = "https://sedona.gateway.scarf.sh/packages/";

public static String send(String engineName, String language) {
HttpURLConnection conn = null;
String telemetrySubmitted = "";
try {
String arch = URLEncoder.encode(System.getProperty("os.arch").replaceAll(" ", "_"), "UTF-8");
String os = URLEncoder.encode(System.getProperty("os.name").replaceAll(" ", "_"), "UTF-8");
String jvm = URLEncoder.encode(System.getProperty("java.version").replaceAll(" ", "_"), "UTF-8");

// Construct URL
telemetrySubmitted = BASE_URL + language + "/" + engineName + "/" + arch + "/" + os + "/" + jvm;

// Check for user opt-out
if (System.getenv("SCARF_NO_ANALYTICS") != null && System.getenv("SCARF_NO_ANALYTICS").equals("true") ||
System.getenv("DO_NOT_TRACK") != null && System.getenv("DO_NOT_TRACK").equals("true") ||
System.getProperty("SCARF_NO_ANALYTICS") != null && System.getProperty("SCARF_NO_ANALYTICS").equals("true") ||
System.getProperty("DO_NOT_TRACK") != null && System.getProperty("DO_NOT_TRACK").equals("true")){
return telemetrySubmitted;
}

// Send GET request
URL url = new URL(telemetrySubmitted);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.connect();
int responseCode = conn.getResponseCode();
// Optionally check the response for successful execution
if (responseCode != 200) {
// Silent handling, no output or log
}
} catch (Exception e) {
// Silent catch block
} finally {
if (conn != null) {
conn.disconnect();
}
}
return telemetrySubmitted;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.common.telemetry;

import org.apache.sedona.common.utils.TelemetryCollector;
import org.junit.Test;

public class TelemetryTest
{
@Test
public void testTelemetryCollector()
{
assert TelemetryCollector.send("test", "java").contains("https://sedona.gateway.scarf.sh/packages/java/test");
}
}
3 changes: 3 additions & 0 deletions docs/asf/telemetry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Apache Sedona uses Scarf to collect anonymous usage data to help us understand how the software is being used and how we can improve it. You can opt out of telemetry collection by setting the environment variable `SCARF_NO_ANALYTICS` or `DO_NOT_TRACK` to `true` on your local machine, or the driver machine of your cluster.

Scarf fully supports the GDPR and is allowed by [the Apache Software Foundation privacy policy](https://privacy.apache.org/faq/committers.html). The privacy policy of Scarf is available at [https://about.scarf.sh/privacy-policy](https://about.scarf.sh/privacy-policy).
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.sedona.common.geometryObjects.Circle;
import org.apache.sedona.common.geometrySerde.GeometrySerde;
import org.apache.sedona.common.geometrySerde.SpatialIndexSerde;
import org.apache.sedona.common.utils.TelemetryCollector;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.GeometryCollection;
import org.locationtech.jts.geom.LineString;
Expand All @@ -46,6 +47,7 @@ public class SedonaContext
*/
public static StreamTableEnvironment create(StreamExecutionEnvironment env, StreamTableEnvironment tblEnv)
{
TelemetryCollector.send("flink", "java");
GeometrySerde serializer = new GeometrySerde();
SpatialIndexSerde indexSerializer = new SpatialIndexSerde(serializer);
env.getConfig().registerTypeWithKryoSerializer(Point.class, serializer);
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ nav:
- Thanks: https://www.apache.org/foundation/thanks.html" target="_blank
- Security: https://www.apache.org/security/" target="_blank
- Privacy: https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank
- Telemetry: asf/telemetry.md
repo_url: https://github.com/apache/sedona
repo_name: apache/sedona
theme:
Expand Down
2 changes: 1 addition & 1 deletion python/sedona/spark/SedonaContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def create(cls, spark: SparkSession) -> SparkSession:
"""
spark.sql("SELECT 1 as geom").count()
PackageImporter.import_jvm_lib(spark._jvm)
spark._jvm.SedonaContext.create(spark._jsparkSession)
spark._jvm.SedonaContext.create(spark._jsparkSession, "python")
return spark

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.sedona.spark

import org.apache.sedona.common.utils.TelemetryCollector
import org.apache.sedona.core.serde.SedonaKryoRegistrator
import org.apache.sedona.sql.UDF.UdfRegistrator
import org.apache.sedona.sql.UDT.UdtRegistrator
Expand All @@ -26,8 +27,11 @@ import org.apache.spark.sql.sedona_sql.optimization.SpatialFilterPushDownForGeoP
import org.apache.spark.sql.sedona_sql.strategy.join.JoinQueryDetector
import org.apache.spark.sql.{SQLContext, SparkSession}

import scala.annotation.StaticAnnotation
import scala.util.Try

class InternalApi(description: String = "This method is for internal use only and may change without notice.") extends StaticAnnotation

object SedonaContext {
def create(sqlContext: SQLContext): SQLContext = {
create(sqlContext.sparkSession)
Expand All @@ -40,6 +44,12 @@ object SedonaContext {
* @return
*/
def create(sparkSession: SparkSession):SparkSession = {
create(sparkSession, "java")
}

@InternalApi
def create(sparkSession: SparkSession, language: String):SparkSession = {
TelemetryCollector.send("spark", language)
if (!sparkSession.experimental.extraStrategies.exists(_.isInstanceOf[JoinQueryDetector])) {
sparkSession.experimental.extraStrategies ++= Seq(new JoinQueryDetector(sparkSession))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,21 @@ import org.apache.spark.sql.{SQLContext, SparkSession}
object SedonaSQLRegistrator {
@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sqlContext: SQLContext): Unit = {
SedonaContext.create(sqlContext.sparkSession)
registerAll(sqlContext, "java")
}

@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sparkSession: SparkSession): Unit =
SedonaContext.create(sparkSession)
registerAll(sparkSession, "java")

@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sqlContext: SQLContext, language: String): Unit = {
SedonaContext.create(sqlContext.sparkSession, language)
}

@deprecated("Use SedonaContext.create instead", "1.4.1")
def registerAll(sparkSession: SparkSession, language: String): Unit =
SedonaContext.create(sparkSession, language)

def dropAll(sparkSession: SparkSession): Unit = {
UdfRegistrator.dropAll(sparkSession)
Expand Down
Loading