Permalink
Browse files

EAGLE-66 Typesafe Streaming DSL and KeyValue based Grouping

- Decouple StreamProducer = StreamInfo + StreamProtocol
- Support typesafe DSL for StreamProducer
- Support KeyedStream and groupByKey
- Decouple ExecutionEnvironment

https://issues.apache.org/jira/browse/EAGLE-66

Author: @haoch <hao@apache.org>
Reviewer: @RalphSu <suliangfei@gmail.com>

Closes #26 #17
  • Loading branch information...
haoch committed Dec 16, 2015
1 parent 2734b42 commit 52b8e58b1af53273782454f52e61b4f4700626c9
Showing with 2,535 additions and 1,363 deletions.
  1. +1 −1 ...lert/eagle-alert-process/src/main/java/org/apache/eagle/alert/config/EmailNotificationConfig.java
  2. +1 −1 ...e-core/eagle-alert/eagle-alert-process/src/main/java/org/apache/eagle/executor/AlertExecutor.java
  3. +2 −2 .../org/apache/eagle/dataproc/impl/storm/{AbstractStormSpoutProvider.java → StormSpoutProvider.java}
  4. +3 −2 ...ss-api/src/main/java/org/apache/eagle/dataproc/impl/storm/hdfs/HDFSSourcedStormSpoutProvider.java
  5. +3 −4 ...ocess-api/src/main/java/org/apache/eagle/dataproc/impl/storm/kafka/KafkaSourcedSpoutProvider.java
  6. +11 −5 ...process-api/src/main/java/org/apache/eagle/dataproc/impl/storm/kafka/KafkaSourcedSpoutScheme.java
  7. +2 −2 ...che/eagle/dataproc/impl/storm/partition/{EagleCustomGrouping.java → CustomPartitionGrouping.java}
  8. +2 −1 ...s/eagle-stream-process-api/src/main/java/org/apache/eagle/datastream/JavaMapperStormExecutor.java
  9. +1 −0 ...tream-process-api/src/main/java/org/apache/eagle/datastream/JavaStormExecutorForAlertWrapper.java
  10. +31 −0 ...ess/eagle-stream-process-api/src/main/java/org/apache/eagle/datastream/utils/JavaReflections.java
  11. +0 −29 ...e-stream-process-api/src/main/scala/org/apache/eagle/datastream/AbstractStreamProducerGraph.scala
  12. +0 −73 ...ss/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/ExecutionEnvironment.scala
  13. +134 −0 ...s/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/ExecutionEnvironments.scala
  14. +0 −48 ...ocess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/FilterBoltWrapper.scala
  15. +0 −37 ...ta-process/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/GraphPrinter.scala
  16. +0 −68 ...-process/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/MapBoltWrapper.scala
  17. +0 −59 ...data-process/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/SpoutProxy.scala
  18. +0 −43 ...-stream-process-api/src/main/scala/org/apache/eagle/datastream/StormExecutorForAlertWrapper.scala
  19. +0 −46 ...gle-stream-process-api/src/main/scala/org/apache/eagle/datastream/StormStreamDAGTransformer.scala
  20. +0 −120 ...ta-process/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/StreamAppDSL.scala
  21. +0 −65 .../eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/StreamGroupbyExpansion.scala
  22. +0 −41 ...ess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/StreamNameExpansion.scala
  23. +0 −55 ...eam-process-api/src/main/scala/org/apache/eagle/datastream/StreamParallelismConfigExpansion.scala
  24. +0 −221 ...-process/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/StreamProducer.scala
  25. +1 −1 ...m-process-api/src/main/scala/org/apache/eagle/datastream/{ → core}/AbstractTopologyCompiler.scala
  26. +1 −1 ...m-process-api/src/main/scala/org/apache/eagle/datastream/{ → core}/AbstractTopologyExecutor.scala
  27. +74 −0 ...cess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/Configuration.scala
  28. +80 −0 ...gle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/ExecutionEnvironment.scala
  29. +53 −48 ...tream-process-api/src/main/scala/org/apache/eagle/datastream/{ → core}/StreamAlertExpansion.scala
  30. +97 −0 ...ss/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/StreamConnector.scala
  31. +12 −12 ...cess-api/src/main/scala/org/apache/eagle/datastream/{StormStreamDAG.scala → core/StreamDAG.scala}
  32. +2 −2 ...-stream-process-api/src/main/scala/org/apache/eagle/datastream/{ → core}/StreamDAGExpansion.scala
  33. +45 −0 ...gle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/StreamDAGTransformer.scala
  34. +77 −0 ...e-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/StreamGroupbyExpansion.scala
  35. +49 −0 ...agle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/StreamNameExpansion.scala
  36. +61 −0 ...rocess-api/src/main/scala/org/apache/eagle/datastream/core/StreamParallelismConfigExpansion.scala
  37. +294 −0 ...ess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/StreamProducer.scala
  38. +10 −17 ...c/main/scala/org/apache/eagle/datastream/{StreamConnector.scala → core/StreamProducerGraph.scala}
  39. +176 −0 ...ess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/StreamProtocol.scala
  40. +47 −0 ...agle-stream-process-api/src/main/scala/org/apache/eagle/datastream/core/StreamSourceBuilder.scala
  41. +18 −9 ...tream-process-api/src/main/scala/org/apache/eagle/datastream/{ → core}/StreamUnionExpansion.scala
  42. +119 −0 ...agle-stream-process-api/src/main/scala/org/apache/eagle/datastream/storm/AbstractStreamBolt.scala
  43. +41 −0 ...eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/storm/FilterBoltWrapper.scala
  44. +44 −0 ...agle-stream-process-api/src/main/scala/org/apache/eagle/datastream/storm/ForeachBoltWrapper.scala
  45. +71 −0 ...gle-stream-process-api/src/main/scala/org/apache/eagle/datastream/storm/IterableStreamSpout.scala
  46. +2 −1 ...ream-process-api/src/main/scala/org/apache/eagle/datastream/{ → storm}/JavaStormBoltWrapper.scala
  47. +1 −1 ...cess-api/src/main/scala/org/apache/eagle/datastream/{kafka → storm}/JsonMessageDeserializer.scala
  48. +9 −13 ...m-process-api/src/main/scala/org/apache/eagle/datastream/{kafka → storm}/KafkaStreamMonitor.scala
  49. +66 −0 ...ss/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/storm/MapBoltWrapper.scala
  50. +111 −0 ...rocess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/storm/SpoutProxy.scala
  51. +15 −9 ...e-stream-process-api/src/main/scala/org/apache/eagle/datastream/{ → storm}/StormBoltFactory.scala
  52. +3 −2 ...e-stream-process-api/src/main/scala/org/apache/eagle/datastream/{ → storm}/StormBoltWrapper.scala
  53. +42 −0 ...ream-process-api/src/main/scala/org/apache/eagle/datastream/storm/StormExecutionEnvironment.scala
  54. +41 −0 ...m-process-api/src/main/scala/org/apache/eagle/datastream/storm/StormExecutorForAlertWrapper.scala
  55. +29 −8 ...-stream-process-api/src/main/scala/org/apache/eagle/datastream/{ → storm}/StormSpoutFactory.scala
  56. +36 −26 ...eam-process-api/src/main/scala/org/apache/eagle/datastream/{ → storm}/StormTopologyCompiler.scala
  57. +7 −10 ...process-api/src/main/scala/org/apache/eagle/datastream/{ → storm}/StormTopologyExecutorImpl.scala
  58. +8 −7 ...rocess-api/src/main/scala/org/apache/eagle/datastream/{ → utils}/AlertExecutorConsumerUtils.scala
  59. +59 −0 ...cess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/utils/GraphPrinter.scala
  60. +24 −0 ...ess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/utils/NameConstants.scala
  61. +6 −4 ...e-stream-process-api/src/main/scala/org/apache/eagle/datastream/{ → utils}/NodeNameSelector.scala
  62. +55 −0 ...ocess/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/utils/ReflectionS.scala
  63. +6 −4 ...s/eagle-stream-process-api/src/main/scala/org/apache/eagle/datastream/{ → utils}/UnionUtils.scala
  64. +41 −0 ...le-stream-process-api/src/test/java/org/apache/eagle/datastream/TestExecutionEnvironmentJava.java
  65. +15 −15 ...data-process/eagle-stream-process-api/src/test/java/org/apache/eagle/datastream/TestJavaMain.java
  66. +36 −0 ...s/eagle-stream-process-api/src/test/java/org/apache/eagle/datastream/TestJavaReflectionUtils.java
  67. +1 −2 ...ss/eagle-stream-process-api/src/test/java/org/apache/eagle/datastream/TestKafkaStreamMonitor.java
  68. +19 −21 ...rocess/eagle-stream-process-api/src/test/scala/org/apache/eagle/datastream/TestDAGExpansion.scala
  69. +34 −0 ...agle-stream-process-api/src/test/scala/org/apache/eagle/datastream/TestExecutionEnvironment.scala
  70. +30 −32 ...process/eagle-stream-process-api/src/test/scala/org/apache/eagle/datastream/TestStormRunner.scala
  71. +87 −0 ...rocess/eagle-stream-process-api/src/test/scala/org/apache/eagle/datastream/TestTypeSafedDSL.scala
  72. 0 ...rocess-api → eagle-stream-process-base}/src/main/java/org/apache/eagle/datastream/JavaMapper.java
  73. +6 −3 ...la → eagle-stream-process-base/src/main/java/org/apache/eagle/datastream/JavaTypeCompatible.java}
  74. +6 −14 ...ata-process/eagle-stream-process-base/src/main/scala/org/apache/eagle/datastream/EagleTuple.scala
  75. +2 −2 ...ata-process/eagle-stream-process-base/src/main/scala/org/apache/eagle/datastream/FlatMapper.scala
  76. +7 −2 ...ss/eagle-stream-process-base/src/main/scala/org/apache/eagle/datastream/StormStreamExecutor.scala
  77. +3 −1 ...-query/eagle-client-base/src/main/java/org/apache/eagle/service/client/EagleServiceConnector.java
  78. +9 −14 ...eagle-metric-collection/src/main/java/org/apache/eagle/metric/kafka/EagleMetricCollectorMain.java
  79. +2 −5 ...metric-collection/src/main/java/org/apache/eagle/metric/kafka/KafkaOffsetSourceSpoutProvider.java
  80. +1 −1 ...security-common/src/main/java/org/apache/eagle/security/hdfs/entity/FileSensitivityAPIEntity.java
  81. +0 −1 ...le-security-common/src/main/java/org/apache/eagle/security/partition/DataDistributionDaoImpl.java
  82. +5 −20 ...y-hbase-securitylog/src/main/java/org/apache/eagle/security/hbase/HbaseAuditLogProcessorMain.java
  83. +0 −1 ...in/java/org/apache/eagle/security/hbase/sensitivity/HbaseResourceSensitivityDataJoinExecutor.java
  84. +15 −0 eagle-security/eagle-security-hdfs-auditlog/run_auditlog_topology.sh
  85. +15 −0 eagle-security/eagle-security-hdfs-auditlog/run_hostname_lookkup.sh
  86. +15 −0 eagle-security/eagle-security-hdfs-auditlog/run_message_producer.sh
  87. +15 −0 eagle-security/eagle-security-hdfs-auditlog/run_message_producer_in_assembly.sh
  88. +17 −0 eagle-security/eagle-security-hdfs-auditlog/src/assembly/eagle-dam-auditlog-assembly.xml
  89. +19 −25 ...ity-hdfs-auditlog/src/main/java/org/apache/eagle/security/auditlog/HdfsAuditLogProcessorMain.java
  90. +6 −9 ...ty-hdfs-auditlog/src/main/java/org/apache/eagle/security/auditlog/HdfsUserCommandReassembler.java
  91. +1 −1 eagle-security/eagle-security-hdfs-auditlog/src/main/resources/application.conf
  92. +0 −16 eagle-security/eagle-security-hdfs-auditlog/src/main/resources/auditlog/auditlog.1
  93. +15 −0 eagle-security/eagle-security-hdfs-auditlog/src/main/resources/security-auditlog-storm.yaml
  94. +5 −22 ...securitylog/src/main/java/org/apache/eagle/security/securitylog/HDFSSecurityLogProcessorMain.java
  95. +9 −8 .../eagle-security-hdfs-web/src/main/java/org/apache/eagle/service/security/hdfs/HDFSFileSystem.java
  96. +7 −16 ...ty-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobRunningMonitoringMain.java
  97. +3 −15 .../detection/src/main/java/org/apache/eagle/security/userprofile/UserProfileDetectionBatchMain.java
  98. +6 −18 ...detection/src/main/java/org/apache/eagle/security/userprofile/UserProfileDetectionStreamMain.java
  99. +0 −1 test.txt
@@ -46,4 +46,4 @@ public String getTplFileName() {
public void setTplFileName(String tplFileName) {
this.tplFileName = tplFileName;
}
}
}
@@ -409,4 +409,4 @@ public void onAlerts(EagleAlertContext context, List<AlertAPIEntity> alerts) {
}
}
}
}
}
@@ -24,6 +24,6 @@
* Normally storm spout is a special part of storm topology and it is implemented in underlying spout implementation
* which can be retrieved from getSpout method.
*/
public abstract class AbstractStormSpoutProvider{
public abstract BaseRichSpout getSpout(Config context);
public interface StormSpoutProvider {
public BaseRichSpout getSpout(Config context);
}
@@ -14,16 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.eagle.dataproc.impl.storm.hdfs;
import com.typesafe.config.Config;
import org.apache.eagle.dataproc.impl.storm.AbstractStormSpoutProvider;
import org.apache.eagle.dataproc.impl.storm.StormSpoutProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.topology.base.BaseRichSpout;
public class HDFSSourcedStormSpoutProvider extends AbstractStormSpoutProvider {
public class HDFSSourcedStormSpoutProvider implements StormSpoutProvider {
private static final Logger LOG = LoggerFactory.getLogger(HDFSSourcedStormSpoutProvider.class);
public abstract static class HDFSSpout extends BaseRichSpout{
@@ -28,9 +28,9 @@
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.base.BaseRichSpout;
import org.apache.eagle.dataproc.impl.storm.AbstractStormSpoutProvider;
import org.apache.eagle.dataproc.impl.storm.StormSpoutProvider;
public class KafkaSourcedSpoutProvider extends AbstractStormSpoutProvider{
public class KafkaSourcedSpoutProvider implements StormSpoutProvider {
private final static Logger LOG = LoggerFactory.getLogger(KafkaSourcedSpoutProvider.class);
public SchemeAsMultiScheme getStreamScheme(String deserClsName, Config context) {
@@ -89,7 +89,6 @@ public BaseRichSpout getSpout(Config context){
}
spoutConfig.scheme = getStreamScheme(deserClsName, context);
KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
return kafkaSpout;
return new KafkaSpout(spoutConfig);
}
}
@@ -19,12 +19,12 @@
import backtype.storm.spout.Scheme;
import backtype.storm.tuple.Fields;
import com.typesafe.config.Config;
import org.apache.eagle.datastream.utils.NameConstants;
import java.lang.reflect.Constructor;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.Map;
/**
* This scheme defines how a kafka message is deserialized and the output field name for storm stream
@@ -56,10 +56,16 @@ public KafkaSourcedSpoutScheme(String deserClsName, Config context){
// the following tasks are executed within the same process of kafka spout
return Arrays.asList(tmp);
}
/**
* Default only f0, but it requires to be overrode if different
*
* TODO: Handle the schema with KeyValue based structure
*
* @return Fields
*/
@Override
public Fields getOutputFields() {
// return new Fields(deserializer.getOutputFields());
throw new UnsupportedOperationException("output fields should be declared in sub class of KafkaSourcedSpoutProvider");
return new Fields(NameConstants.FIELD_PREFIX()+"0");
}
}
}
@@ -28,12 +28,12 @@
import java.util.Arrays;
import java.util.List;
public class EagleCustomGrouping implements CustomStreamGrouping {
public class CustomPartitionGrouping implements CustomStreamGrouping {
public List<Integer> targetTasks;
public PartitionStrategy strategy;
public EagleCustomGrouping(PartitionStrategy strategy) {
public CustomPartitionGrouping(PartitionStrategy strategy) {
this.strategy = strategy;
}
@@ -29,6 +29,7 @@
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import org.apache.eagle.datastream.utils.NameConstants;
public class JavaMapperStormExecutor extends BaseRichBolt{
public static class e1 extends JavaMapperStormExecutor {
@@ -75,7 +76,7 @@ public void execute(Tuple input) {
public void declareOutputFields(OutputFieldsDeclarer declarer) {
List<String> fields = new ArrayList<String>();
for(int i=0; i<numOutputFields; i++){
fields.add(OutputFieldNameConst.FIELD_PREFIX() + i);
fields.add(NameConstants.FIELD_PREFIX() + i);
}
declarer.declare(new Fields(fields));
}
@@ -52,4 +52,5 @@ public void collect(Object o) {
};
delegate.flatMap(input, delegateCollector);
}
}
@@ -0,0 +1,31 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.eagle.datastream.utils;
import java.lang.reflect.ParameterizedType;
/**
* @since 12/7/15
*/
class JavaReflections {
@SuppressWarnings("unchecked")
public static Class<?> getGenericTypeClass(final Object obj,int index) {
return (Class<?>) ((ParameterizedType) obj
.getClass()
.getGenericSuperclass()).getActualTypeArguments()[index];
}
}

This file was deleted.

Oops, something went wrong.

This file was deleted.

Oops, something went wrong.
Oops, something went wrong.

0 comments on commit 52b8e58

Please sign in to comment.