Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

release commit

  • Loading branch information...
commit 9d91adbdbde22e91779b91eb40805f598da5b004 0 parents
Nathan Marz nathanmarz authored

Showing 182 changed files with 39,203 additions and 0 deletions. Show diff stats Hide diff stats

  1. +21 0 .gitignore
  2. +261 0 LICENSE.html
  3. +26 0 README.markdown
  4. +178 0 TODO
  5. +41 0 bin/build_release.sh
  6. +29 0 bin/install_zmq.sh
  7. +2 0  bin/javadoc.sh
  8. +80 0 bin/storm
  9. +64 0 conf/defaults.yaml
  10. +10 0 conf/log4j.properties
  11. +16 0 conf/storm.yaml.example
  12. +9 0 log4j/log4j.properties
  13. +32 0 project.clj
  14. +26 0 src/clj/backtype/storm/LocalCluster.clj
  15. +38 0 src/clj/backtype/storm/bootstrap.clj
  16. +71 0 src/clj/backtype/storm/clojure.clj
  17. +422 0 src/clj/backtype/storm/cluster.clj
  18. +9 0 src/clj/backtype/storm/command/config_value.clj
  19. +13 0 src/clj/backtype/storm/command/kill_topology.clj
  20. +15 0 src/clj/backtype/storm/command/shell_submission.clj
  21. +139 0 src/clj/backtype/storm/config.clj
  22. +96 0 src/clj/backtype/storm/daemon/common.clj
  23. +81 0 src/clj/backtype/storm/daemon/drpc.clj
  24. +638 0 src/clj/backtype/storm/daemon/nimbus.clj
  25. +399 0 src/clj/backtype/storm/daemon/supervisor.clj
  26. +498 0 src/clj/backtype/storm/daemon/task.clj
  27. +254 0 src/clj/backtype/storm/daemon/worker.clj
  28. +56 0 src/clj/backtype/storm/event.clj
  29. +11 0 src/clj/backtype/storm/log.clj
  30. +33 0 src/clj/backtype/storm/process_simulator.clj
  31. +306 0 src/clj/backtype/storm/stats.clj
  32. +432 0 src/clj/backtype/storm/testing.clj
  33. +168 0 src/clj/backtype/storm/thrift.clj
  34. +12 0 src/clj/backtype/storm/tuple.clj
  35. +682 0 src/clj/backtype/storm/ui/core.clj
  36. +115 0 src/clj/backtype/storm/ui/helpers.clj
  37. +499 0 src/clj/backtype/storm/util.clj
  38. +110 0 src/clj/backtype/storm/zookeeper.clj
  39. +93 0 src/clj/zilch/mq.clj
  40. +96 0 src/clj/zilch/virtual_port.clj
  41. +1 0  src/dev/resources/storm.fy
  42. +1 0  src/dev/resources/storm.py
  43. +1 0  src/dev/resources/storm.rb
  44. +10 0 src/dev/resources/tester.fy
  45. +8 0 src/dev/resources/tester.py
  46. +10 0 src/dev/resources/tester.rb
  47. +6 0 src/genthrift.sh
  48. +299 0 src/jvm/backtype/storm/Config.java
  49. +6 0 src/jvm/backtype/storm/Constants.java
  50. +16 0 src/jvm/backtype/storm/ILocalCluster.java
  51. +96 0 src/jvm/backtype/storm/StormSubmitter.java
  52. +94 0 src/jvm/backtype/storm/clojure/ClojureBolt.java
  53. +5 0 src/jvm/backtype/storm/daemon/Shutdownable.java
  54. +25 0 src/jvm/backtype/storm/drpc/DRPCScheme.java
  55. +48 0 src/jvm/backtype/storm/drpc/ReturnResults.java
  56. +6 0 src/jvm/backtype/storm/drpc/SpoutAdder.java
  57. +320 0 src/jvm/backtype/storm/generated/AlreadyAliveException.java
  58. +559 0 src/jvm/backtype/storm/generated/Bolt.java
  59. +747 0 src/jvm/backtype/storm/generated/BoltStats.java
  60. +583 0 src/jvm/backtype/storm/generated/ClusterSummary.java
  61. +455 0 src/jvm/backtype/storm/generated/ComponentCommon.java
  62. +301 0 src/jvm/backtype/storm/generated/ComponentObject.java
  63. +1,566 0 src/jvm/backtype/storm/generated/DistributedRPC.java
  64. +412 0 src/jvm/backtype/storm/generated/ErrorInfo.java
  65. +406 0 src/jvm/backtype/storm/generated/GlobalStreamId.java
  66. +460 0 src/jvm/backtype/storm/generated/Grouping.java
  67. +320 0 src/jvm/backtype/storm/generated/InvalidTopologyException.java
  68. +7,967 0 src/jvm/backtype/storm/generated/Nimbus.java
  69. +320 0 src/jvm/backtype/storm/generated/NotAliveException.java
  70. +219 0 src/jvm/backtype/storm/generated/NullStruct.java
  71. +407 0 src/jvm/backtype/storm/generated/ShellComponent.java
  72. +509 0 src/jvm/backtype/storm/generated/SpoutSpec.java
  73. +744 0 src/jvm/backtype/storm/generated/SpoutStats.java
  74. +417 0 src/jvm/backtype/storm/generated/StateSpoutSpec.java
  75. +648 0 src/jvm/backtype/storm/generated/StormTopology.java
  76. +449 0 src/jvm/backtype/storm/generated/StreamInfo.java
  77. +590 0 src/jvm/backtype/storm/generated/SupervisorSummary.java
  78. +302 0 src/jvm/backtype/storm/generated/TaskSpecificStats.java
  79. +667 0 src/jvm/backtype/storm/generated/TaskStats.java
  80. +906 0 src/jvm/backtype/storm/generated/TaskSummary.java
  81. +640 0 src/jvm/backtype/storm/generated/TopologyInfo.java
  82. +685 0 src/jvm/backtype/storm/generated/TopologySummary.java
  83. +8 0 src/jvm/backtype/storm/planner/CompoundSpout.java
  84. +8 0 src/jvm/backtype/storm/planner/CompoundTask.java
  85. +16 0 src/jvm/backtype/storm/planner/TaskBundle.java
  86. +29 0 src/jvm/backtype/storm/serialization/FieldSerialization.java
  87. +31 0 src/jvm/backtype/storm/serialization/ISerialization.java
  88. +207 0 src/jvm/backtype/storm/serialization/SerializationFactory.java
  89. +49 0 src/jvm/backtype/storm/serialization/TupleDeserializer.java
  90. +58 0 src/jvm/backtype/storm/serialization/TupleSerializer.java
  91. +49 0 src/jvm/backtype/storm/serialization/ValuesDeserializer.java
  92. +54 0 src/jvm/backtype/storm/serialization/ValuesSerializer.java
  93. +71 0 src/jvm/backtype/storm/spout/ISpout.java
  94. +13 0 src/jvm/backtype/storm/spout/ISpoutOutputCollector.java
  95. +15 0 src/jvm/backtype/storm/spout/RawScheme.java
  96. +11 0 src/jvm/backtype/storm/spout/Scheme.java
  97. +33 0 src/jvm/backtype/storm/spout/ShellSpout.java
  98. +104 0 src/jvm/backtype/storm/spout/SpoutOutputCollector.java
  99. +12 0 src/jvm/backtype/storm/state/IStateSpout.java
  100. +5 0 src/jvm/backtype/storm/state/IStateSpoutOutputCollector.java
  101. +8 0 src/jvm/backtype/storm/state/ISubscribedState.java
  102. +7 0 src/jvm/backtype/storm/state/ISynchronizeOutputCollector.java
  103. +11 0 src/jvm/backtype/storm/state/StateSpoutOutputCollector.java
  104. +13 0 src/jvm/backtype/storm/state/SynchronizeOutputCollector.java
  105. +219 0 src/jvm/backtype/storm/task/CoordinatedBolt.java
  106. +67 0 src/jvm/backtype/storm/task/IBolt.java
  107. +12 0 src/jvm/backtype/storm/task/IInternalOutputCollector.java
  108. +15 0 src/jvm/backtype/storm/task/IOutputCollector.java
  109. +61 0 src/jvm/backtype/storm/task/KeyedFairBolt.java
  110. +166 0 src/jvm/backtype/storm/task/OutputCollector.java
  111. +82 0 src/jvm/backtype/storm/task/OutputCollectorImpl.java
  112. +194 0 src/jvm/backtype/storm/task/ShellBolt.java
  113. +313 0 src/jvm/backtype/storm/task/TopologyContext.java
  114. +8 0 src/jvm/backtype/storm/testing/AckFailDelegate.java
  115. +35 0 src/jvm/backtype/storm/testing/AckTracker.java
  116. +97 0 src/jvm/backtype/storm/testing/BoltTracker.java
  117. +75 0 src/jvm/backtype/storm/testing/FeederSpout.java
  118. +21 0 src/jvm/backtype/storm/testing/FixedTuple.java
  119. +103 0 src/jvm/backtype/storm/testing/FixedTupleSpout.java
  120. +94 0 src/jvm/backtype/storm/testing/SpoutTracker.java
  121. +45 0 src/jvm/backtype/storm/testing/TestAggregatesCounter.java
  122. +42 0 src/jvm/backtype/storm/testing/TestGlobalCount.java
  123. +32 0 src/jvm/backtype/storm/testing/TestPlannerBolt.java
  124. +52 0 src/jvm/backtype/storm/testing/TestPlannerSpout.java
  125. +43 0 src/jvm/backtype/storm/testing/TestWordCounter.java
  126. +59 0 src/jvm/backtype/storm/testing/TestWordSpout.java
  127. +72 0 src/jvm/backtype/storm/testing/TrackerAggregator.java
  128. +49 0 src/jvm/backtype/storm/testing/TupleCaptureBolt.java
  129. +36 0 src/jvm/backtype/storm/topology/BasicBoltExecutor.java
  130. +42 0 src/jvm/backtype/storm/topology/BasicOutputCollector.java
  131. +11 0 src/jvm/backtype/storm/topology/IBasicBolt.java
  132. +8 0 src/jvm/backtype/storm/topology/IBasicOutputCollector.java
  133. +17 0 src/jvm/backtype/storm/topology/IComponent.java
  134. +12 0 src/jvm/backtype/storm/topology/IRichBolt.java
  135. +19 0 src/jvm/backtype/storm/topology/IRichSpout.java
  136. +8 0 src/jvm/backtype/storm/topology/IRichStateSpout.java
  137. +24 0 src/jvm/backtype/storm/topology/InputDeclarer.java
  138. +15 0 src/jvm/backtype/storm/topology/OutputFieldsDeclarer.java
  139. +36 0 src/jvm/backtype/storm/topology/OutputFieldsGetter.java
  140. +271 0 src/jvm/backtype/storm/topology/TopologyBuilder.java
  141. +53 0 src/jvm/backtype/storm/tuple/Fields.java
  142. +81 0 src/jvm/backtype/storm/tuple/MessageId.java
  143. +144 0 src/jvm/backtype/storm/tuple/Tuple.java
  144. +16 0 src/jvm/backtype/storm/tuple/Values.java
  145. +37 0 src/jvm/backtype/storm/utils/BufferFileInputStream.java
  146. +27 0 src/jvm/backtype/storm/utils/CRC32OutputStream.java
  147. +40 0 src/jvm/backtype/storm/utils/DRPCClient.java
  148. +51 0 src/jvm/backtype/storm/utils/KeyedRoundRobinQueue.java
  149. +44 0 src/jvm/backtype/storm/utils/LocalState.java
  150. +44 0 src/jvm/backtype/storm/utils/NimbusClient.java
  151. +78 0 src/jvm/backtype/storm/utils/Time.java
  152. +149 0 src/jvm/backtype/storm/utils/TimeCacheMap.java
  153. +144 0 src/jvm/backtype/storm/utils/Utils.java
  154. +166 0 src/jvm/backtype/storm/utils/VersionedStore.java
  155. +358 0 src/jvm/backtype/storm/utils/WritableUtils.java
  156. +9 0 src/multilang/fy/storm.fancypack
  157. +163 0 src/multilang/fy/storm.fy
  158. +143 0 src/multilang/py/storm.py
  159. +116 0 src/multilang/rb/storm.rb
  160. 0  src/py/__init__.py
  161. +86 0 src/py/storm/DistributedRPC-remote
  162. +459 0 src/py/storm/DistributedRPC.py
  163. +149 0 src/py/storm/Nimbus-remote
  164. +2,283 0 src/py/storm/Nimbus.py
  165. +1 0  src/py/storm/__init__.py
  166. +10 0 src/py/storm/constants.py
  167. +2,599 0 src/py/storm/ttypes.py
  168. +174 0 src/storm.thrift
Sorry, we could not display the entire diff because it was too big.
21 .gitignore
... ... @@ -0,0 +1,21 @@
  1 +/classes
  2 +/lib
  3 +deploy/lib
  4 +deploy/logs
  5 +.emacs-project
  6 +*.jar
  7 +bin/jzmq
  8 +.DS_Store
  9 +pom.xml
  10 +deploy/classes
  11 +*.fyc
  12 +*.rbc
  13 +*.pyc
  14 +CHILD
  15 +CHILDMAKER
  16 +NANNY
  17 +\#project.clj\#
  18 +.\#project.clj
  19 +.lein-failures
  20 +_release
  21 +*.zip
261 LICENSE.html
... ... @@ -0,0 +1,261 @@
  1 +<?xml version="1.0" encoding="ISO-8859-1" ?>
  2 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml">
  4 +
  5 +<head>
  6 +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
  7 +<title>Eclipse Public License - Version 1.0</title>
  8 +<style type="text/css">
  9 + body {
  10 + size: 8.5in 11.0in;
  11 + margin: 0.25in 0.5in 0.25in 0.5in;
  12 + tab-interval: 0.5in;
  13 + }
  14 + p {
  15 + margin-left: auto;
  16 + margin-top: 0.5em;
  17 + margin-bottom: 0.5em;
  18 + }
  19 + p.list {
  20 + margin-left: 0.5in;
  21 + margin-top: 0.05em;
  22 + margin-bottom: 0.05em;
  23 + }
  24 + </style>
  25 +
  26 +</head>
  27 +
  28 +<body lang="EN-US">
  29 +<p>Copyright (c) Nathan Marz. All rights reserved.</p>
  30 +<p align=center><b>Eclipse Public License - v 1.0</b></p>
  31 +
  32 +<p>THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE
  33 +PUBLIC LICENSE (&quot;AGREEMENT&quot;). ANY USE, REPRODUCTION OR
  34 +DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS
  35 +AGREEMENT.</p>
  36 +
  37 +<p><b>1. DEFINITIONS</b></p>
  38 +
  39 +<p>&quot;Contribution&quot; means:</p>
  40 +
  41 +<p class="list">a) in the case of the initial Contributor, the initial
  42 +code and documentation distributed under this Agreement, and</p>
  43 +<p class="list">b) in the case of each subsequent Contributor:</p>
  44 +<p class="list">i) changes to the Program, and</p>
  45 +<p class="list">ii) additions to the Program;</p>
  46 +<p class="list">where such changes and/or additions to the Program
  47 +originate from and are distributed by that particular Contributor. A
  48 +Contribution 'originates' from a Contributor if it was added to the
  49 +Program by such Contributor itself or anyone acting on such
  50 +Contributor's behalf. Contributions do not include additions to the
  51 +Program which: (i) are separate modules of software distributed in
  52 +conjunction with the Program under their own license agreement, and (ii)
  53 +are not derivative works of the Program.</p>
  54 +
  55 +<p>&quot;Contributor&quot; means any person or entity that distributes
  56 +the Program.</p>
  57 +
  58 +<p>&quot;Licensed Patents&quot; mean patent claims licensable by a
  59 +Contributor which are necessarily infringed by the use or sale of its
  60 +Contribution alone or when combined with the Program.</p>
  61 +
  62 +<p>&quot;Program&quot; means the Contributions distributed in accordance
  63 +with this Agreement.</p>
  64 +
  65 +<p>&quot;Recipient&quot; means anyone who receives the Program under
  66 +this Agreement, including all Contributors.</p>
  67 +
  68 +<p><b>2. GRANT OF RIGHTS</b></p>
  69 +
  70 +<p class="list">a) Subject to the terms of this Agreement, each
  71 +Contributor hereby grants Recipient a non-exclusive, worldwide,
  72 +royalty-free copyright license to reproduce, prepare derivative works
  73 +of, publicly display, publicly perform, distribute and sublicense the
  74 +Contribution of such Contributor, if any, and such derivative works, in
  75 +source code and object code form.</p>
  76 +
  77 +<p class="list">b) Subject to the terms of this Agreement, each
  78 +Contributor hereby grants Recipient a non-exclusive, worldwide,
  79 +royalty-free patent license under Licensed Patents to make, use, sell,
  80 +offer to sell, import and otherwise transfer the Contribution of such
  81 +Contributor, if any, in source code and object code form. This patent
  82 +license shall apply to the combination of the Contribution and the
  83 +Program if, at the time the Contribution is added by the Contributor,
  84 +such addition of the Contribution causes such combination to be covered
  85 +by the Licensed Patents. The patent license shall not apply to any other
  86 +combinations which include the Contribution. No hardware per se is
  87 +licensed hereunder.</p>
  88 +
  89 +<p class="list">c) Recipient understands that although each Contributor
  90 +grants the licenses to its Contributions set forth herein, no assurances
  91 +are provided by any Contributor that the Program does not infringe the
  92 +patent or other intellectual property rights of any other entity. Each
  93 +Contributor disclaims any liability to Recipient for claims brought by
  94 +any other entity based on infringement of intellectual property rights
  95 +or otherwise. As a condition to exercising the rights and licenses
  96 +granted hereunder, each Recipient hereby assumes sole responsibility to
  97 +secure any other intellectual property rights needed, if any. For
  98 +example, if a third party patent license is required to allow Recipient
  99 +to distribute the Program, it is Recipient's responsibility to acquire
  100 +that license before distributing the Program.</p>
  101 +
  102 +<p class="list">d) Each Contributor represents that to its knowledge it
  103 +has sufficient copyright rights in its Contribution, if any, to grant
  104 +the copyright license set forth in this Agreement.</p>
  105 +
  106 +<p><b>3. REQUIREMENTS</b></p>
  107 +
  108 +<p>A Contributor may choose to distribute the Program in object code
  109 +form under its own license agreement, provided that:</p>
  110 +
  111 +<p class="list">a) it complies with the terms and conditions of this
  112 +Agreement; and</p>
  113 +
  114 +<p class="list">b) its license agreement:</p>
  115 +
  116 +<p class="list">i) effectively disclaims on behalf of all Contributors
  117 +all warranties and conditions, express and implied, including warranties
  118 +or conditions of title and non-infringement, and implied warranties or
  119 +conditions of merchantability and fitness for a particular purpose;</p>
  120 +
  121 +<p class="list">ii) effectively excludes on behalf of all Contributors
  122 +all liability for damages, including direct, indirect, special,
  123 +incidental and consequential damages, such as lost profits;</p>
  124 +
  125 +<p class="list">iii) states that any provisions which differ from this
  126 +Agreement are offered by that Contributor alone and not by any other
  127 +party; and</p>
  128 +
  129 +<p class="list">iv) states that source code for the Program is available
  130 +from such Contributor, and informs licensees how to obtain it in a
  131 +reasonable manner on or through a medium customarily used for software
  132 +exchange.</p>
  133 +
  134 +<p>When the Program is made available in source code form:</p>
  135 +
  136 +<p class="list">a) it must be made available under this Agreement; and</p>
  137 +
  138 +<p class="list">b) a copy of this Agreement must be included with each
  139 +copy of the Program.</p>
  140 +
  141 +<p>Contributors may not remove or alter any copyright notices contained
  142 +within the Program.</p>
  143 +
  144 +<p>Each Contributor must identify itself as the originator of its
  145 +Contribution, if any, in a manner that reasonably allows subsequent
  146 +Recipients to identify the originator of the Contribution.</p>
  147 +
  148 +<p><b>4. COMMERCIAL DISTRIBUTION</b></p>
  149 +
  150 +<p>Commercial distributors of software may accept certain
  151 +responsibilities with respect to end users, business partners and the
  152 +like. While this license is intended to facilitate the commercial use of
  153 +the Program, the Contributor who includes the Program in a commercial
  154 +product offering should do so in a manner which does not create
  155 +potential liability for other Contributors. Therefore, if a Contributor
  156 +includes the Program in a commercial product offering, such Contributor
  157 +(&quot;Commercial Contributor&quot;) hereby agrees to defend and
  158 +indemnify every other Contributor (&quot;Indemnified Contributor&quot;)
  159 +against any losses, damages and costs (collectively &quot;Losses&quot;)
  160 +arising from claims, lawsuits and other legal actions brought by a third
  161 +party against the Indemnified Contributor to the extent caused by the
  162 +acts or omissions of such Commercial Contributor in connection with its
  163 +distribution of the Program in a commercial product offering. The
  164 +obligations in this section do not apply to any claims or Losses
  165 +relating to any actual or alleged intellectual property infringement. In
  166 +order to qualify, an Indemnified Contributor must: a) promptly notify
  167 +the Commercial Contributor in writing of such claim, and b) allow the
  168 +Commercial Contributor to control, and cooperate with the Commercial
  169 +Contributor in, the defense and any related settlement negotiations. The
  170 +Indemnified Contributor may participate in any such claim at its own
  171 +expense.</p>
  172 +
  173 +<p>For example, a Contributor might include the Program in a commercial
  174 +product offering, Product X. That Contributor is then a Commercial
  175 +Contributor. If that Commercial Contributor then makes performance
  176 +claims, or offers warranties related to Product X, those performance
  177 +claims and warranties are such Commercial Contributor's responsibility
  178 +alone. Under this section, the Commercial Contributor would have to
  179 +defend claims against the other Contributors related to those
  180 +performance claims and warranties, and if a court requires any other
  181 +Contributor to pay any damages as a result, the Commercial Contributor
  182 +must pay those damages.</p>
  183 +
  184 +<p><b>5. NO WARRANTY</b></p>
  185 +
  186 +<p>EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS
  187 +PROVIDED ON AN &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS
  188 +OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION,
  189 +ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY
  190 +OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely
  191 +responsible for determining the appropriateness of using and
  192 +distributing the Program and assumes all risks associated with its
  193 +exercise of rights under this Agreement , including but not limited to
  194 +the risks and costs of program errors, compliance with applicable laws,
  195 +damage to or loss of data, programs or equipment, and unavailability or
  196 +interruption of operations.</p>
  197 +
  198 +<p><b>6. DISCLAIMER OF LIABILITY</b></p>
  199 +
  200 +<p>EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT
  201 +NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
  202 +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
  203 +WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
  204 +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  205 +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
  206 +DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
  207 +HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.</p>
  208 +
  209 +<p><b>7. GENERAL</b></p>
  210 +
  211 +<p>If any provision of this Agreement is invalid or unenforceable under
  212 +applicable law, it shall not affect the validity or enforceability of
  213 +the remainder of the terms of this Agreement, and without further action
  214 +by the parties hereto, such provision shall be reformed to the minimum
  215 +extent necessary to make such provision valid and enforceable.</p>
  216 +
  217 +<p>If Recipient institutes patent litigation against any entity
  218 +(including a cross-claim or counterclaim in a lawsuit) alleging that the
  219 +Program itself (excluding combinations of the Program with other
  220 +software or hardware) infringes such Recipient's patent(s), then such
  221 +Recipient's rights granted under Section 2(b) shall terminate as of the
  222 +date such litigation is filed.</p>
  223 +
  224 +<p>All Recipient's rights under this Agreement shall terminate if it
  225 +fails to comply with any of the material terms or conditions of this
  226 +Agreement and does not cure such failure in a reasonable period of time
  227 +after becoming aware of such noncompliance. If all Recipient's rights
  228 +under this Agreement terminate, Recipient agrees to cease use and
  229 +distribution of the Program as soon as reasonably practicable. However,
  230 +Recipient's obligations under this Agreement and any licenses granted by
  231 +Recipient relating to the Program shall continue and survive.</p>
  232 +
  233 +<p>Everyone is permitted to copy and distribute copies of this
  234 +Agreement, but in order to avoid inconsistency the Agreement is
  235 +copyrighted and may only be modified in the following manner. The
  236 +Agreement Steward reserves the right to publish new versions (including
  237 +revisions) of this Agreement from time to time. No one other than the
  238 +Agreement Steward has the right to modify this Agreement. The Eclipse
  239 +Foundation is the initial Agreement Steward. The Eclipse Foundation may
  240 +assign the responsibility to serve as the Agreement Steward to a
  241 +suitable separate entity. Each new version of the Agreement will be
  242 +given a distinguishing version number. The Program (including
  243 +Contributions) may always be distributed subject to the version of the
  244 +Agreement under which it was received. In addition, after a new version
  245 +of the Agreement is published, Contributor may elect to distribute the
  246 +Program (including its Contributions) under the new version. Except as
  247 +expressly stated in Sections 2(a) and 2(b) above, Recipient receives no
  248 +rights or licenses to the intellectual property of any Contributor under
  249 +this Agreement, whether expressly, by implication, estoppel or
  250 +otherwise. All rights in the Program not expressly granted under this
  251 +Agreement are reserved.</p>
  252 +
  253 +<p>This Agreement is governed by the laws of the State of New York and
  254 +the intellectual property laws of the United States of America. No party
  255 +to this Agreement will bring a legal action under this Agreement more
  256 +than one year after the cause of action arose. Each party waives its
  257 +rights to a jury trial in any resulting litigation.</p>
  258 +
  259 +</body>
  260 +
  261 +</html>
26 README.markdown
Source Rendered
... ... @@ -0,0 +1,26 @@
  1 +Storm is a distributed realtime computation system. Similar to how Hadoop provides a set of general primitives for doing batch processing, Storm provides a set of general primitives for doing realtime computation. Storm is simple, can be used with any programming language, and is a lot of fun to use!
  2 +
  3 +## Documentation
  4 +
  5 +Documentation and tutorials can be found on the [Storm wiki](http://github.com/nathanmarz/storm/wiki).
  6 +
  7 +## Getting help
  8 +
  9 +Feel free to ask questions on Storm's mailing list: http://groups.google.com/group/storm-user
  10 +
  11 +You can also come to the #storm-user room on [freenode](http://freenode.net/). You can usually find a Storm developer there to help you out.
  12 +
  13 +## License
  14 +
  15 +The use and distribution terms for this software are covered by the
  16 +Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
  17 +which can be found in the file LICENSE.html at the root of this distribution.
  18 +By using this software in any fashion, you are agreeing to be bound by
  19 +the terms of this license.
  20 +You must not remove this notice, or any other, from this software.
  21 +
  22 +## Contributors
  23 +
  24 +* Nathan Marz ([@nathanmarz](http://twitter.com/nathanmarz))
  25 +* Jason Jackson ([@jason_j](http://twitter.com/jason_j))
  26 +* Christopher Bertels ([@bakkdoor](http://twitter.com/bakkdoor))
178 TODO
... ... @@ -0,0 +1,178 @@
  1 +Use cases:
  2 +
  3 +1. number of steps between 2 people in a graph (topology with cycles?)
  4 +
  5 +
  6 +#################
  7 +
  8 +* Repackage jzmq and zmq as a leiningen "native dep"
  9 + - this might be good, since the native dep can package builds for all different systems/os's?
  10 +
  11 +
  12 +* Deploy design:
  13 +
  14 +- storm swap {name} {jar} {class}
  15 +- it's allowed to use resources equal to current running topology plus number of free resources
  16 +- starts in deactivated mode
  17 +- add TOPOLOGY_STARTUP_TIME config for the delay until nimbus activates a topology after launching it
  18 +- for swap, after the startup time, deactivate the other topology, wait the TOPOLOGY_MESSAGE_TIMEOUT_SECS, and then activate the other topology
  19 +- should be able to decrease the message timeout for killing or swapping (add optional thrift parameter) -- or just make it part of the config?
  20 +- add killWithOptions, swap, swapWithOptions
  21 +
  22 +* Storm UI, stats, debugging, diagnosis tools
  23 +-- need to be able to hide system streams/components from the calculations (another query param and should be default)
  24 +-- need to optimize (slowness is probably on nimbus end of querying zk, consider adding heartbeat caching into nimbus)
  25 +-- add margins
  26 +-- add titles so its easier to distinguish the various pages
  27 +-- right align all table columns except for the leftmost
  28 +
  29 +* Unit test the core pieces that have stabilized their APIs
  30 +
  31 +- process simulator
  32 +- virtual ports
  33 +- supervisor
  34 +- utils
  35 +- test worker/tasks
  36 +
  37 +* implement pseudo-distributed mode -- this is for testing the distributed parts of the code
  38 + - perhaps i can use pallet/vmfest for this
  39 +
  40 +* Need integration tests that run on an actual storm cluster (scp code/process code/zookeeper code not tested in unit tests)
  41 +
  42 +* bolts with none grouping can be pushed into a bolt. e.g. A -> B -> C
  43 + A -> D -> E
  44 +
  45 +If A -> B and A -> D are shuffle grouping = none, and B -> C and D -> E are not, then both can be run in A, b's branch goes to C and D's branch goes to E
  46 +
  47 +
  48 +* Failure design
  49 +
  50 +Add fail method to outputcollector
  51 +Fail sends fail message to Acker for those anchors, which sends fail message back to spout.
  52 +Whenever spout fails a tuple, it emits it in its failure stream...
  53 +
  54 +Add fail method to drpc... Causes blocked thread to throw exception
  55 +
  56 +* Have worker heartbeat with its task ids, nimbus verifies - if wrong, reassign tasks?
  57 +- detect and ignore stray tasks
  58 +Each worker can choose a unique id for itself when heart beating
  59 +- nimbus deletes those that aren't in topology
  60 +
  61 +* Subscriptions design
  62 +
  63 +-- new kind of spout: "subscription spout"
  64 + --> goal is to sync it's data across the tasks that subscribe to its streams
  65 + --> after doing a grouping, remembers what task it sent the tuple to (regardless of grouping). if a task dies, it knows its subscriptions and asks to be resynced
  66 + --> normal operation is to push to tasks, but pull done when a task starts up (b/c previous task died or something)
  67 + --> need to be able to add tuples to subscription or take tuples away (this is protocol with who you're subscribing to - e.g. rocket)
  68 + --> subscriptions can only happen in a spout because it requires persistent state
  69 + --> when subscription spout task dies, it polls the source (e.g. rocket) for all the subscription info
  70 + --> ideally you'd set things up to have one subscription spout per rocket server
  71 + --> TODO: Need some way to delete subscriptions -> part of tuple or extra metadata on tuple (extra metadata seems cleaner)
  72 + --> add isSubscription() method to Tuple as well as a getSubscriptionType() [which returns ADD or REMOVE]
  73 + --> when a spout starts up, it also needs to push all of its subscription info
  74 + --> acks are irrelevant for subscription tuples -- how should acks be managed as an abstraction?
  75 + -- maybe the synchronized state is done for you -- you just access the state directly and receive a callback whenever it changes?
  76 + -- so don't use tuples...
  77 + --> subscriptions break all the abstractions, perhaps I should generalize spouts and factor acking as a library on top of storm. subscriptions would just be another kind of library? -> no, it seems to break abstractions anyway (like keeping task -> tuples in memory)
  78 + --> maybe call it "syncspout"
  79 + --> if just do syncing (don't expose tuples directly?)
  80 + --> have a "SubscribedState" class that takes care of indexing/etc. --> expose it through topologycontext?
  81 + -- need a way to distinguish between states of different streams
  82 + -- has "add" and "remove" methods
  83 + -- bolt can give a statemanager object that implements add and remove in the prepare method
  84 + -- add(Tuple tuple)
  85 + -- remove(Tuple tuple)
  86 + --> synchronize protocol (when spout or source of data dies):
  87 + --> send how many tuples are going to be sent
  88 + --> send the tuples
  89 + --> OR: pack everything together into a single message (could be hard b/c where tuples are supposed to go is abstracted away)
  90 + --> tie everything together with a unique ID
  91 + --> once task receives everything, has info needed to remove tuples
  92 + --> statespout should do long-polling with timeout
  93 + --> to do subscriptions, the state should contain something like [url, subscriber]. some bolt appends subscriber to tuples, group by subscriber, and send info back
  94 + --> how to to fields grouping with an even distribution?
  95 + --> ********* tasks need to block on startup until they're synchronized *********
  96 + --> send sync messages in a loop until it's synchronized
  97 + --> add a task.synchronize.poll.freq.secs config (default to 10 seconds)
  98 + --> need to buffer other messages as topology is waiting for synchronization messages (use disk?)
  99 + --> could use acking system to know if a piece of state gets fully synchronized and communicate this with user
  100 + --> perhaps expose this through a special stream? (the state status stream -> similar to failure streams)
  101 + --> should be able to do updates of existing state
  102 + --> use case: have a knob that you can set externally
  103 + --> this isn't really any better than just using zookeeper directly
  104 +
  105 +
  106 +_myState = context.setSubscribedState(_myState)
  107 +
  108 +StateSpout {
  109 + //does a timeout long poll and emits new add or remove state tuples (add and remove on the output collector)
  110 + nextTuple(StateSpoutOutputCollector) //collector has add and remove methods add(id, tuple). remove(id)
  111 + //emits all the tuples into the output collector (in the background, will also send ids and counts to tasks so they know how to synchronize)
  112 + //called on startup
  113 + //collector can have a synchronize method in case the source of data (e.g., rocket) craps out
  114 + synchronize(SynchronizationOutputCollector) //collector only has add(id, tuple) method
  115 +}
  116 +
  117 +//task startup (in prepare method) [this is automatic]
  118 +for(int taskId: statespoutids) {
  119 + emitDirect(SYNC_STREAM, tuple())
  120 +}
  121 +
  122 +statespout synchronization():
  123 + id = uuid()
  124 + //getAlLStateTuples calls synchronize on the spout to get the tuples
  125 + for(Tuple t: getAllStateTuplesFromSource()) {
  126 + List tasks = emit(cons(id, t));
  127 + .. keep track of id -> tasks -> count
  128 + for(task: all output tasks) {
  129 + emitDirect(task, id, count)
  130 + }
  131 + }
  132 +
  133 +for synchronization to work, task needs to keep track of which tasks sent it tuples, and compare against only that set on synchronization
  134 +
  135 +Need a way to propogate information back up the topology - "subscriptions"
  136 +e.g. browser -> rocket -> bolt -> bolt -> bolt.
  137 +
  138 +example: #retweets for a subscribed set of tweet ids
  139 +
  140 +storm topology
  141 +
  142 + -> tweet spout (A) -> group on original id -> count (B) -> rocket
  143 +
  144 +subscriptions: rocket -> count (B) tweet id (need to group) -> spout (need to go to all)
  145 +
  146 +-- how does it work when stuff dies downstream or upstream? do people ask what the subscriptions are? or do you push your subscriptions up? a combination?
  147 +
  148 +-- maybe subscriptions are a "constant" spout? e..g, continuously emits and refreshes to make sure every task has the tuple. this seem amporphous and hard to implement... nimbus would need to refire all constant spouts whenever there's a reassignment that affects the flow of data. subscriptions seem more natural
  149 +
  150 +-- subscriptions are a special kind of stream that are driven by being asked to send it. e..g, rocket is a spout that emits subscription/unsubscription tuples. they only send it when they get something new, or are asked as to what all the subscriptions are
  151 +
  152 +-- maybe you just need a system stream to know when tasks are created. when you see that a downstream task is created, you know to fire subscriptions to it if its subscribed to your subscriptions stream? - how does this interplay with all the grouping types... you almost want to do a grouping and only send what to tasks that would have received. spouts would need to be able to subscribe to streams as well
  153 +
  154 +(use 'backtype.storm.testing)
  155 +;;(start-simulating-time!)
  156 +(def cluster (mk-local-storm-cluster))
  157 +(use 'backtype.storm.bootstrap) (bootstrap)
  158 +(import '[backtype.storm.testing TestWordCounter TestWordSpout TestGlobalCount TestAggregatesCounter])
  159 +(def spout (feeder-spout ["word"]))
  160 +(def topology (thrift/mk-topology
  161 + {1 (thrift/mk-spout-spec spout :parallelism-hint 3)}
  162 + {2 (thrift/mk-bolt-spec {1 ["word"]} (TestWordCounter.) :parallelism-hint 4)
  163 + 3 (thrift/mk-bolt-spec {1 :global} (TestGlobalCount.))
  164 + 4 (thrift/mk-bolt-spec {2 :global} (TestAggregatesCounter.))
  165 + }))
  166 +(submit-local-topology (:nimbus cluster) "test" {TOPOLOGY-WORKERS 4 TOPOLOGY-DEBUG true} topology)
  167 +
  168 +
  169 +* clean up project
  170 + - remove log4j dir and instead generate it in the deploy (it's only used in bin/storm -> create a console one and put into bin/)
  171 + - include system component / stream information in the topologycontext and clean up system specific code all over the place
  172 +
  173 +* Very rare errors
  174 +
  175 +weird nullptr exceptions:
  176 +(tasks i) on send-fn
  177 +no virtual port socket for outbound task (in worker)
  178 +
41 bin/build_release.sh
... ... @@ -0,0 +1,41 @@
  1 +#!/bin/bash
  2 +
  3 +RELEASE=`head -1 project.clj | awk '{print $3}' | sed -e 's/\"//' | sed -e 's/\"//'`
  4 +
  5 +echo Making release $RELEASE
  6 +
  7 +DIR=_release/storm-$RELEASE
  8 +
  9 +rm -rf _release
  10 +export LEIN_ROOT=1
  11 +rm *jar
  12 +lein clean
  13 +lein deps
  14 +lein compile
  15 +mv conf/log4j.properties conf/storm.log.properties
  16 +lein jar
  17 +mv conf/storm.log.properties conf/log4j.properties
  18 +mkdir -p $DIR
  19 +mkdir $DIR/lib
  20 +cp storm*jar $DIR/
  21 +cp lib/*.jar $DIR/lib
  22 +
  23 +cp -R log4j $DIR/
  24 +mkdir $DIR/logs
  25 +
  26 +mkdir $DIR/conf
  27 +cp conf/storm.yaml.example $DIR/conf/storm.yaml
  28 +
  29 +cp -R src/ui/public $DIR/
  30 +
  31 +cp -R bin $DIR/
  32 +
  33 +cp README.markdown $DIR/
  34 +cp LICENSE.html $DIR/
  35 +
  36 +cd _release
  37 +zip -r storm-$RELEASE.zip *
  38 +cd ..
  39 +mv _release/storm-*.zip .
  40 +rm -rf _release
  41 +
29 bin/install_zmq.sh
... ... @@ -0,0 +1,29 @@
  1 +#!/bin/bash
  2 +export JAVA_HOME=$(/usr/libexec/java_home)
  3 +
  4 +if [ ! -d "$JAVA_HOME/include" ]; then
  5 + echo "
  6 +Looks like you're missing your 'include' directory. If you're using Mac OS X, You'll need to install the Java dev package.
  7 +
  8 +- Navigate to http://goo.gl/D8lI
  9 +- Click the Java tab on the right
  10 +- Install the appropriate version and try again.
  11 +"
  12 + exit -1;
  13 +fi
  14 +
  15 +#install zeromq
  16 +wget http://download.zeromq.org/historic/zeromq-2.1.7.tar.gz
  17 +tar -xzf zeromq-2.1.7.tar.gz
  18 +cd zeromq-2.1.7
  19 +./configure
  20 +make
  21 +sudo make install
  22 +
  23 +#install jzmq (both native and into local maven cache)
  24 +git clone https://github.com/nathanmarz/jzmq.git
  25 +cd jzmq
  26 +./autogen.sh
  27 +./configure
  28 +make
  29 +sudo make install
2  bin/javadoc.sh
... ... @@ -0,0 +1,2 @@
  1 +mkdir -p doc
  2 +javadoc -d doc/ `find src -name "*.java" | grep -v generated`
80 bin/storm
... ... @@ -0,0 +1,80 @@
  1 +#!/usr/bin/python
  2 +
  3 +import os
  4 +import sys
  5 +import random
  6 +import subprocess as sub
  7 +
  8 +CONF_DIR = os.path.expanduser("~/.storm")
  9 +STORM_DIR = "/".join(os.path.abspath( __file__ ).split("/")[:-2])
  10 +
  11 +def get_jars_full(adir):
  12 + files = os.listdir(adir)
  13 + ret = []
  14 + for f in files:
  15 + if f.endswith(".jar"):
  16 + ret.append(adir + "/" + f)
  17 + return ret
  18 +
  19 +def get_classpath(extrajars):
  20 + ret = get_jars_full(STORM_DIR)
  21 + ret.extend(get_jars_full(STORM_DIR + "/lib"))
  22 + ret.extend(extrajars)
  23 + return ":".join(ret)
  24 +
  25 +def confvalue(name):
  26 + cp = get_classpath([])
  27 + command = ["java", "-client", "-cp", cp, "backtype.storm.command.config_value", name]
  28 + p = sub.Popen(command,stdout=sub.PIPE)
  29 + output, errors = p.communicate()
  30 + lines = output.split("\n")
  31 + for line in lines:
  32 + tokens = line.split(" ")
  33 + if tokens[0] == "VALUE:":
  34 + return tokens[1]
  35 +
  36 +def exec_storm_class(klass, jvmtype="-server", childopts="", extrajars=[], args=[], prefix=""):
  37 + nativepath = confvalue("java.library.path")
  38 + command = prefix + " java " + jvmtype + " -Djava.library.path=" + nativepath + " " + childopts + " -cp " + get_classpath(extrajars) + " " + klass + " " + " ".join(args)
  39 + print "Running: " + command
  40 + os.system(command)
  41 +
  42 +COMMAND = sys.argv[1]
  43 +ARGS = sys.argv[2:]
  44 +
  45 +def jar(jarfile, klass, *args):
  46 + exec_storm_class(
  47 + klass,
  48 + childopts="-Dlog4j.configuration=storm.log.properties",
  49 + jvmtype="-client",
  50 + extrajars=[jarfile, CONF_DIR, STORM_DIR + "/bin"],
  51 + args=args,
  52 + prefix="export STORM_JAR=" + jarfile + ";")
  53 +
  54 +def kill(name):
  55 + exec_storm_class("backtype.storm.command.kill_topology", args=[name], jvmtype="-client", extrajars=[CONF_DIR, STORM_DIR + "/bin"], childopts="-Dlog4j.configuration=storm.log.properties")
  56 +
  57 +def shell(resourcesdir, command, *args):
  58 + tmpjarpath = "stormshell" + str(random.randint(0, 10000000)) + ".jar"
  59 + os.system("jar cf %s %s" % (tmpjarpath, resourcesdir))
  60 + runnerargs = [tmpjarpath, command]
  61 + runnerargs.extend(args)
  62 + exec_storm_class("backtype.storm.command.shell_submission", args=runnerargs, jvmtype="-client", extrajars=[CONF_DIR], childopts="-Dlog4j.configuration=storm.log.properties")
  63 + os.system("rm " + tmpjarpath)
  64 +
  65 +def nimbus():
  66 + childopts = confvalue("nimbus.childopts") + " -Dlogfile.name=nimbus.log"
  67 + exec_storm_class("backtype.storm.daemon.nimbus", jvmtype="-server", extrajars=[STORM_DIR + "/log4j", STORM_DIR + "/conf"], childopts=childopts)
  68 +
  69 +def supervisor():
  70 + childopts = confvalue("nimbus.childopts") + " -Dlogfile.name=supervisor.log"
  71 + exec_storm_class("backtype.storm.daemon.supervisor", jvmtype="-server", extrajars=[STORM_DIR + "/log4j", STORM_DIR + "/conf"], childopts=childopts)
  72 +
  73 +def ui():
  74 + childopts = "-Xmx768m -Dlogfile.name=ui.log"
  75 + exec_storm_class("backtype.storm.ui.core", jvmtype="-server", childopts=childopts, extrajars=[STORM_DIR + "/log4j", STORM_DIR, STORM_DIR + "/conf"])
  76 +
  77 +
  78 +COMMANDS = {"jar": jar, "kill": kill, "shell": shell, "nimbus": nimbus, "ui": ui, "supervisor": supervisor}
  79 +
  80 +COMMANDS[COMMAND](*ARGS)
64 conf/defaults.yaml
... ... @@ -0,0 +1,64 @@
  1 +########### These all have default values as shown
  2 +########### Additional configuration goes into storm.yaml
  3 +
  4 +java.library.path: "/usr/local/lib:/opt/local/lib:/usr/lib"
  5 +
  6 +### storm.* configs are general configurations
  7 +# the local dir is where jars are kept
  8 +storm.local.dir: "/mnt/storm"
  9 +storm.zookeeper.port: 2181
  10 +storm.zookeeper.root: "/storm"
  11 +storm.zookeeper.session.timeout: 10000
  12 +storm.cluster.mode: "distributed" # can be distributed or local
  13 +
  14 +### nimbus.* configs are for the master
  15 +nimbus.thrift.port: 6627
  16 +nimbus.childopts: "-Xmx1024m"
  17 +nimbus.task.timeout.secs: 30
  18 +nimbus.supervisor.timeout.secs: 60
  19 +nimbus.monitor.freq.secs: 10
  20 +nimbus.task.launch.secs: 90
  21 +nimbus.reassign: true
  22 +nimbus.file.copy.expiration.secs: 600
  23 +
  24 +### supervisor.* configs are for node supervisors
  25 +# Define the amount of workers that can be run on this machine. Each worker is assigned a port to use for communication
  26 +supervisor.slots.ports:
  27 + - 6700
  28 + - 6701
  29 + - 6702
  30 + - 6703
  31 +supervisor.childopts: "-Xmx1024m"
  32 +#how long supervisor will wait to ensure that a worker process is started
  33 +supervisor.worker.start.timeout.secs: 20
  34 +#how long between heartbeats until supervisor considers that worker dead and tries to restart it
  35 +supervisor.worker.timeout.secs: 25
  36 +#how frequently the supervisor checks on the status of the processes it's monitoring and restarts if necessary
  37 +supervisor.monitor.frequency.secs: 3
  38 +#how frequently the supervisor heartbeats to the cluster state (for nimbus)
  39 +supervisor.heartbeat.frequency.secs: 5
  40 +supervisor.enable: true
  41 +
  42 +### worker.* configs are for task workers
  43 +worker.childopts: "-Xmx768m"
  44 +worker.heartbeat.frequency.secs: 1
  45 +
  46 +task.heartbeat.frequency.secs: 3
  47 +task.refresh.poll.secs: 10
  48 +
  49 +zmq.threads: 1
  50 +zmq.linger.millis: 5000
  51 +
  52 +### topology.* configs are for specific executing storms
  53 +topology.debug: false
  54 +topology.optimize: true
  55 +topology.workers: 1
  56 +topology.ackers: 1
  57 +# maximum amount of time a message has to complete before it's considered failed
  58 +topology.message.timeout.secs: 30
  59 +topology.skip.missing.serializations: false
  60 +topology.max.task.parallelism: null
  61 +topology.max.spout.pending: null
  62 +topology.state.synchronization.timeout.secs: 60
  63 +topology.stats.sample.rate: 0.05
  64 +
10 conf/log4j.properties
... ... @@ -0,0 +1,10 @@
  1 +#This file should be deleted when deployed to server (workaround to leiningen classpath putting dev resources on path)
  2 +#This file is needed for tests
  3 +
  4 +log4j.rootLogger=INFO, A1
  5 +
  6 +log4j.appender.A1=org.apache.log4j.ConsoleAppender
  7 +
  8 +log4j.appender.A1.layout=org.apache.log4j.PatternLayout
  9 +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
  10 +log4j.category.org.apache.zookeeper=warn
16 conf/storm.yaml.example
... ... @@ -0,0 +1,16 @@
  1 +########### These MUST be filled in for a storm configuration
  2 +# storm.zookeeper.servers:
  3 +# - "server1"
  4 +# - "server2"
  5 +#
  6 +# nimbus.host: "nimbus"
  7 +#
  8 +#
  9 +# ##### These may optionally be filled in:
  10 +#
  11 +## Map of tokens to a serialization class. tokens less than 32 are reserved by storm.
  12 +## Tokens are written on the wire to identify the field.
  13 +# topology.serializations:
  14 +# 33: "org.mycompany.MyObjectSerialization"
  15 +# 34: "org.mycompany.MyOtherObjectSerialization"
  16 +
9 log4j/log4j.properties
... ... @@ -0,0 +1,9 @@
  1 +log4j.rootLogger=INFO, A1
  2 +
  3 +
  4 +log4j.appender.A1 = org.apache.log4j.DailyRollingFileAppender
  5 +log4j.appender.A1.File = logs/${logfile.name}
  6 +log4j.appender.A1.Append = true
  7 +log4j.appender.A1.DatePattern = '.'yyy-MM-dd
  8 +log4j.appender.A1.layout = org.apache.log4j.PatternLayout
  9 +log4j.appender.A1.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n
32 project.clj
... ... @@ -0,0 +1,32 @@
  1 +(defproject storm "0.5.0"
  2 + :source-path "src/clj"
  3 + :test-path "test/clj"
  4 + :java-source-path "src/jvm"
  5 + :javac-options {:debug "true" :fork "true"}
  6 + :resources-path "conf"
  7 + :dev-resources-path "src/dev"
  8 + :dependencies [[org.clojure/clojure "1.2.0"]
  9 + [org.clojure/clojure-contrib "1.2.0"]
  10 + [commons-io "1.4"]
  11 + [org.apache.commons/commons-exec "1.1"]
  12 + [jvyaml "1.0.0"]
  13 + [backtype/thriftjava "1.0.0"]
  14 + [clj-time "0.3.0"]
  15 + [log4j/log4j "1.2.16"]
  16 + [org.apache.zookeeper/zookeeper "3.3.2"]
  17 + [backtype/jzmq "2.1.0"]
  18 + [com.googlecode.json-simple/json-simple "1.1"]
  19 + [compojure "0.6.4"]
  20 + [hiccup "0.3.6"]
  21 + [ring/ring-jetty-adapter "0.3.11"]
  22 + ]
  23 + :uberjar-exclusions [#"META-INF.*"]
  24 + :dev-dependencies [
  25 + [swank-clojure "1.2.1"]
  26 + [lein-ring "0.4.5"]
  27 + ]
  28 + :jvm-opts ["-Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib"]
  29 + :ring {:handler backtype.storm.ui.core/app}
  30 + :extra-classpath-dirs ["src/ui"]
  31 + :aot :all
  32 +)
26 src/clj/backtype/storm/LocalCluster.clj
... ... @@ -0,0 +1,26 @@
  1 +(ns backtype.storm.LocalCluster
  2 + (:use [backtype.storm testing])
  3 + (:gen-class
  4 + :init init
  5 + :implements [backtype.storm.ILocalCluster]
  6 + :constructors {[] []}
  7 + :state state ))
  8 +
  9 +(defn -init []
  10 + (let [ret (mk-local-storm-cluster)]
  11 + [[] ret]
  12 + ))
  13 +
  14 +(defn -submitTopology [this name conf topology]
  15 + (submit-local-topology (:nimbus (. this state))
  16 + name
  17 + conf
  18 + topology))
  19 +
  20 +(defn -shutdown [this]
  21 + (kill-local-storm-cluster (. this state))
  22 + )
  23 +
  24 +(defn -killTopology [this name]
  25 + (.killTopology (:nimbus (. this state)) name)
  26 + )
38 src/clj/backtype/storm/bootstrap.clj
... ... @@ -0,0 +1,38 @@
  1 +(ns backtype.storm.bootstrap)
  2 +
  3 +(defmacro bootstrap []
  4 + '(do
  5 + (import (quote [backtype.storm Constants]))
  6 + (import (quote [backtype.storm.testing FeederSpout TestPlannerBolt TestPlannerSpout AckFailDelegate AckTracker]))
  7 + (import (quote [backtype.storm.utils Utils LocalState Time TimeCacheMap
  8 + TimeCacheMap$ExpiredCallback BufferFileInputStream]))
  9 + (import (quote [backtype.storm.serialization TupleSerializer TupleDeserializer]))
  10 + (import (quote [backtype.storm.spout ISpout SpoutOutputCollector ISpoutOutputCollector ShellSpout]))
  11 + (import (quote [backtype.storm.tuple Tuple Fields MessageId]))
  12 + (import (quote [backtype.storm.task IBolt IOutputCollector
  13 + OutputCollector OutputCollectorImpl IInternalOutputCollector
  14 + TopologyContext ShellBolt
  15 + CoordinatedBolt CoordinatedBolt$SourceArgs KeyedFairBolt]))
  16 + (import (quote [backtype.storm.daemon Shutdownable]))
  17 + (use (quote [backtype.storm config util log clojure]))
  18 + (use (quote [clojure.contrib.seq :only [find-first]]))
  19 + (require (quote [backtype.storm [thrift :as thrift] [cluster :as cluster]
  20 + [event :as event] [process-simulator :as psim]]))
  21 + (require (quote [clojure.set :as set]))
  22 + (require (quote [zilch [mq :as mq]]))
  23 + (require (quote [zilch [virtual-port :as mqvp]]))
  24 + (require (quote [backtype.storm [stats :as stats]]))
  25 + (import (quote [org.apache.log4j PropertyConfigurator Logger]))
  26 +
  27 + (import (quote [backtype.storm.generated Nimbus Nimbus$Processor Nimbus$Iface StormTopology ShellComponent
  28 + NotAliveException AlreadyAliveException InvalidTopologyException
  29 + ClusterSummary TopologyInfo TopologySummary TaskSummary TaskStats TaskSpecificStats
  30 + SpoutStats BoltStats ErrorInfo SupervisorSummary]))
  31 + (import (quote [backtype.storm.daemon.common StormBase Assignment
  32 + TaskInfo SupervisorInfo WorkerHeartbeat TaskHeartbeat]))
  33 + (import (quote [java.io File FileOutputStream FileInputStream]))
  34 + (import (quote [java.util List Random Map HashMap]))
  35 + (import (quote [org.apache.commons.io FileUtils]))
  36 + (import (quote [java.util ArrayList]))
  37 + (mq/zeromq-imports)
  38 + ))
71 src/clj/backtype/storm/clojure.clj
... ... @@ -0,0 +1,71 @@
  1 +(ns backtype.storm.clojure
  2 + (:use backtype.storm.bootstrap)
  3 + (:import [backtype.storm.generated StreamInfo])
  4 + (:import [backtype.storm.tuple Tuple])
  5 + (:import [backtype.storm.task OutputCollector])
  6 + (:import backtype.storm.clojure.ClojureBolt)
  7 + (:require [backtype.storm [thrift :as thrift]]))
  8 +
  9 +(defn direct-stream [fields]
  10 + (StreamInfo. fields true))
  11 +
  12 +(defn clojure-bolt* [output-spec fn-var & args]
  13 + (let [m (meta fn-var)]
  14 + (ClojureBolt. (str (:ns m)) (str (:name m)) args (thrift/mk-output-spec output-spec))
  15 + ))
  16 +
  17 +(defmacro clojure-bolt [output-spec fn-sym & args]
  18 + `(clojure-bolt* ~output-spec (var ~fn-sym) ~@args))
  19 +
  20 +(defmacro defbolt [name output-spec [tuple-sym collector-sym] & body]
  21 + (let [worker-name (symbol (str name "__"))]
  22 + `(do
  23 + (defn ~worker-name []
  24 + (fn [^Tuple ~tuple-sym ^OutputCollector ~collector-sym]
  25 + ~@body
  26 + ))
  27 + (def ~name (clojure-bolt ~output-spec ~worker-name))
  28 + )))
  29 +
  30 +(defn hint [sym class-sym]
  31 + (with-meta sym {:tag class-sym})
  32 + )
  33 +
  34 +(defmulti hinted-args (fn [kw args] kw))
  35 +
  36 +(defmethod hinted-args :prepare [_ [conf context collector]]
  37 + [(hint conf 'java.util.Map)
  38 + (hint context 'backtype.storm.task.TopologyContext)
  39 + (hint collector 'backtype.storm.bolt.OutputCollector)]
  40 + )
  41 +
  42 +(defmethod hinted-args :execute [_ [tuple collector]]
  43 + [(hint tuple 'backtype.storm.tuple.Tuple)
  44 + (hint collector 'backtype.storm.task.OutputCollector)]
  45 + )
  46 +
  47 +(defmethod hinted-args :cleanup [_ [collector]]
  48 + [(hint collector 'backtype.storm.task.OutputCollector)]
  49 + )
  50 +
  51 +(defmacro defboltfull [name output-spec & kwargs]
  52 + (let [opts (apply hash-map kwargs)
  53 + worker-name (symbol (str name "__"))
  54 + let-bindings (:let opts)
  55 + hof-args (:params opts)
  56 + definer (if hof-args
  57 + `(defn ~name [& args#]
  58 + (apply clojure-bolt* ~output-spec (var ~worker-name) args#))
  59 + `(def ~name (clojure-bolt ~output-spec ~worker-name)))
  60 + fns (select-keys opts [:prepare :execute :cleanup])
  61 + fns (into {}
  62 + (for [[fn-kw [args & impl]] fns]
  63 + [fn-kw `(fn ~(hinted-args fn-kw args) ~@impl)]
  64 + ))]
  65 + `(do
  66 + (defn ~worker-name [~@hof-args]
  67 + (let [~@let-bindings]
  68 + ~fns
  69 + ))
  70 + ~definer
  71 + )))
422 src/clj/backtype/storm/cluster.clj
... ... @@ -0,0 +1,422 @@
  1 +(ns backtype.storm.cluster
  2 + (:import [org.apache.zookeeper.data Stat])
  3 + (:import [backtype.storm.utils Utils])
  4 + (:use [backtype.storm util log config])
  5 + (:use [clojure.contrib.core :only [dissoc-in]])
  6 + (:require [backtype.storm [zookeeper :as zk]])
  7 + )
  8 +
  9 +(defprotocol ClusterState
  10 + (set-ephemeral-node [this path data])
  11 + (delete-node [this path])
  12 + (set-data [this path data]) ;; if node does not exist, create persistent with this data
  13 + (get-data [this path watch?])
  14 + (get-children [this path watch?])
  15 + (mkdirs [this path])
  16 + (close [this])
  17 + (register [this callback])
  18 + (unregister [this id])
  19 + )
  20 +
  21 +(defn mk-distributed-cluster-state [conf]
  22 + (let [zk (zk/mk-client (mk-zk-connect-string (assoc conf STORM-ZOOKEEPER-ROOT "/")))]
  23 + (zk/mkdirs zk (conf STORM-ZOOKEEPER-ROOT))
  24 + (.close zk)
  25 + )
  26 + (let [callbacks (atom {})
  27 + active (atom true)
  28 + mk-zk #(zk/mk-client (mk-zk-connect-string conf)
  29 + (conf STORM-ZOOKEEPER-SESSION-TIMEOUT)
  30 + %)
  31 + zk (atom nil)
  32 + watcher (fn this [state type path]
  33 + (when @active
  34 + (when-not (= :connected state)
  35 + (log-message "Zookeeper disconnected. Attempting to reconnect")
  36 + (reset! zk (mk-zk this))
  37 + )
  38 + (when-not (= :none type)
  39 + (doseq [callback (vals @callbacks)]
  40 + (callback type path))))
  41 + )]
  42 + (reset! zk (mk-zk watcher))
  43 + (reify
  44 + ClusterState
  45 + (register [this callback]
  46 + (let [id (uuid)]
  47 + (swap! callbacks assoc id callback)
  48 + id
  49 + ))
  50 + (unregister [this id]
  51 + (swap! callbacks dissoc id))
  52 + (set-ephemeral-node [this path data]
  53 + (zk/mkdirs @zk (parent-path path))
  54 + (if (zk/exists @zk path false)
  55 + (zk/set-data @zk path data) ; should verify that it's ephemeral
  56 + (zk/create-node @zk path data :ephemeral)
  57 + ))
  58 +
  59 + (set-data [this path data]
  60 + ;; note: this does not turn off any existing watches
  61 + (if (zk/exists @zk path false)
  62 + (zk/set-data @zk path data)
  63 + (do
  64 + (zk/mkdirs @zk (parent-path path))
  65 + (zk/create-node @zk path data :persistent)
  66 + )))
  67 +
  68 + (delete-node [this path]
  69 + (zk/delete-recursive @zk path)
  70 + )
  71 +
  72 + (get-data [this path watch?]
  73 + (zk/get-data @zk path watch?)
  74 + )
  75 +
  76 + (get-children [this path watch?]
  77 + (zk/get-children @zk path watch?))
  78 +
  79 + (mkdirs [this path]
  80 + (zk/mkdirs @zk path))
  81 +
  82 + (close [this]
  83 + (reset! active false)
  84 + (.close @zk))
  85 + )))
  86 +
  87 +(defprotocol StormClusterState
  88 + (assignments [this callback])
  89 + (assignment-info [this storm-id callback])
  90 + (active-storms [this])
  91 + (storm-base [this storm-id callback])
  92 +
  93 + (task-storms [this])
  94 + (task-ids [this storm-id])
  95 + (task-info [this storm-id task-id])
  96 + (task-heartbeat [this storm-id task-id]) ;; returns nil if doesn't exist
  97 + (supervisors [this callback])
  98 + (supervisor-info [this supervisor-id]) ;; returns nil if doesn't exist
  99 +
  100 + (setup-heartbeats! [this storm-id])
  101 + (teardown-heartbeats! [this storm-id])
  102 + (teardown-task-errors! [this storm-id])
  103 + (heartbeat-storms [this])
  104 + (task-error-storms [this])
  105 + (heartbeat-tasks [this storm-id])
  106 +
  107 + (set-task! [this storm-id task-id info])
  108 + (task-heartbeat! [this storm-id task-id info])
  109 + (remove-task-heartbeat! [this storm-id task-id])
  110 + (supervisor-heartbeat! [this supervisor-id info])
  111 + (activate-storm! [this storm-id storm-base])
  112 + (deactivate-storm! [this storm-id])
  113 + (set-assignment! [this storm-id info])
  114 + (remove-storm! [this storm-id])
  115 + (report-task-error [this storm-id task-id error])
  116 + (task-errors [this storm-id task-id])
  117 +
  118 + (disconnect [this])
  119 + )
  120 +
  121 +
  122 +(def ASSIGNMENTS-ROOT "assignments")
  123 +(def TASKS-ROOT "tasks")
  124 +(def CODE-ROOT "code")
  125 +(def STORMS-ROOT "storms")
  126 +(def SUPERVISORS-ROOT "supervisors")
  127 +(def TASKBEATS-ROOT "taskbeats")
  128 +(def TASKERRORS-ROOT "taskerrors")
  129 +
  130 +(def ASSIGNMENTS-SUBTREE (str "/" ASSIGNMENTS-ROOT))
  131 +(def TASKS-SUBTREE (str "/" TASKS-ROOT))
  132 +(def STORMS-SUBTREE (str "/" STORMS-ROOT))
  133 +(def SUPERVISORS-SUBTREE (str "/" SUPERVISORS-ROOT))
  134 +(def TASKBEATS-SUBTREE (str "/" TASKBEATS-ROOT))
  135 +(def TASKERRORS-SUBTREE (str "/" TASKERRORS-ROOT))
  136 +
  137 +(defn supervisor-path [id]
  138 + (str SUPERVISORS-SUBTREE "/" id))
  139 +
  140 +(defn assignment-path [id]
  141 + (str ASSIGNMENTS-SUBTREE "/" id))
  142 +
  143 +(defn storm-path [id]
  144 + (str STORMS-SUBTREE "/" id))
  145 +
  146 +(defn storm-task-root [storm-id]
  147 + (str TASKS-SUBTREE "/" storm-id))
  148 +
  149 +(defn task-path [storm-id task-id]
  150 + (str (storm-task-root storm-id) "/" task-id))
  151 +
  152 +(defn taskbeat-storm-root [storm-id]
  153 + (str TASKBEATS-SUBTREE "/" storm-id))
  154 +
  155 +(defn taskbeat-path [storm-id task-id]
  156 + (str (taskbeat-storm-root storm-id) "/" task-id))