Browse files

PIG-692 When running a job from a script, use that script name as the…

… default job name.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/pig/trunk@750271 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
0 parents commit 98f01f5d875b6c4f83c0d5469456457865b653c8 Alan Gates committed Mar 5, 2009
Showing with 19,700 additions and 0 deletions.
  1. +451 −0 CHANGES.txt
  2. +29 −0 KEYS
  3. +202 −0 LICENSE.txt
  4. +15 −0 NOTICE.txt
  5. +50 −0 README.txt
  6. +46 −0 autocomplete
  7. +175 −0 bin/pig
  8. +451 −0 build.xml
  9. +8 −0 conf/log4j.properties
  10. +62 −0 conf/pig.properties
  11. +3 −0 contrib/CHANGES.txt
  12. +122 −0 contrib/piggybank/java/build.xml
  13. +186 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java
  14. +103 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ABS.java
  15. +56 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ACOS.java
  16. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ASIN.java
  17. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ATAN.java
  18. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ATAN2.java
  19. +32 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/Base.java
  20. +55 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/CBRT.java
  21. +55 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/CEIL.java
  22. +53 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/COS.java
  23. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/COSH.java
  24. +82 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleAbs.java
  25. +67 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleBase.java
  26. +84 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleCopySign.java
  27. +71 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleDoubleBase.java
  28. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleGetExponent.java
  29. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleMax.java
  30. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleMin.java
  31. +87 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleNextAfter.java
  32. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleNextup.java
  33. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleRound.java
  34. +82 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleSignum.java
  35. +85 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/DoubleUlp.java
  36. +53 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/EXP.java
  37. +53 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/EXPM1.java
  38. +55 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FLOOR.java
  39. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatAbs.java
  40. +84 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatCopySign.java
  41. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatGetExponent.java
  42. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatMax.java
  43. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatMin.java
  44. +87 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatNextAfter.java
  45. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatNextup.java
  46. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatRound.java
  47. +82 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatSignum.java
  48. +85 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/FloatUlp.java
  49. +53 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/HYPOT.java
  50. +55 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/IEEEremainder.java
  51. +81 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/IntAbs.java
  52. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/IntMax.java
  53. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/IntMin.java
  54. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/LOG.java
  55. +53 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/LOG10.java
  56. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/LOG1P.java
  57. +82 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/LongAbs.java
  58. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/LongMax.java
  59. +80 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/LongMin.java
  60. +107 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/MAX.java
  61. +108 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/MIN.java
  62. +100 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/NEXTUP.java
  63. +55 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/POW.java
  64. +38 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/RANDOM.java
  65. +55 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/RINT.java
  66. +100 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ROUND.java
  67. +107 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/SCALB.java
  68. +102 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/SIGNUM.java
  69. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/SIN.java
  70. +53 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/SINH.java
  71. +52 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/SQRT.java
  72. +53 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/TAN.java
  73. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/TANH.java
  74. +102 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ULP.java
  75. +38 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/Util.java
  76. +103 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/copySign.java
  77. +101 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/getExponent.java
  78. +107 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/nextAfter.java
  79. +55 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/toDegrees.java
  80. +54 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/toRadians.java
  81. +367 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/stats/COR.java
  82. +345 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/stats/COV.java
  83. +83 −0 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/UPPER.java
  84. +59 −0 contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestEvalString.java
  85. +375 −0 contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMathUDF.java
  86. +135 −0 contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestStat.java
  87. +45 −0 docs/overview.html
  88. +153 −0 lib-src/bzip2/org/apache/tools/bzip2r/BZip2Constants.java
  89. +956 −0 lib-src/bzip2/org/apache/tools/bzip2r/CBZip2InputStream.java
  90. +1,705 −0 lib-src/bzip2/org/apache/tools/bzip2r/CBZip2OutputStream.java
  91. +184 −0 lib-src/bzip2/org/apache/tools/bzip2r/CRC.java
  92. +527 −0 lib-src/shock/org/apache/pig/shock/SSHSocketImplFactory.java
  93. +18 −0 lib/hadoop-LICENSE.txt
  94. BIN lib/hadoop17.jar
  95. BIN lib/hadoop18.jar
  96. BIN lib/hbase-0.18.1-test.jar
  97. BIN lib/hbase-0.18.1.jar
  98. +24 −0 lib/javacc-LICENSE.txt
  99. BIN lib/javacc.jar
  100. BIN lib/jline-0.9.94.jar
  101. +149 −0 lib/jline-LICENSE.txt
  102. BIN lib/jsch-0.1.33.jar
  103. +30 −0 lib/jsch-LICENSE.txt
  104. BIN lib/junit-4.1.jar
  105. +149 −0 lib/junit-LICENSE.txt
  106. +57 −0 src/org/apache/pig/Algebraic.java
  107. +63 −0 src/org/apache/pig/ComparisonFunc.java
  108. +199 −0 src/org/apache/pig/EvalFunc.java
  109. +37 −0 src/org/apache/pig/ExecType.java
  110. +33 −0 src/org/apache/pig/FilterFunc.java
  111. +221 −0 src/org/apache/pig/FuncSpec.java
  112. +168 −0 src/org/apache/pig/LoadFunc.java
  113. +585 −0 src/org/apache/pig/Main.java
  114. +338 −0 src/org/apache/pig/PigException.java
  115. +788 −0 src/org/apache/pig/PigServer.java
  116. +62 −0 src/org/apache/pig/PigWarning.java
  117. +31 −0 src/org/apache/pig/ReversibleLoadStoreFunc.java
  118. +106 −0 src/org/apache/pig/Slice.java
  119. +50 −0 src/org/apache/pig/Slicer.java
  120. +84 −0 src/org/apache/pig/StandAloneParser.java
  121. +64 −0 src/org/apache/pig/StoreFunc.java
  122. +157 −0 src/org/apache/pig/backend/BackendException.java
  123. +24 −0 src/org/apache/pig/backend/datastorage/ContainerDescriptor.java
  124. +142 −0 src/org/apache/pig/backend/datastorage/DataStorage.java
  125. +159 −0 src/org/apache/pig/backend/datastorage/DataStorageException.java
  126. +163 −0 src/org/apache/pig/backend/datastorage/ElementDescriptor.java
  127. +36 −0 src/org/apache/pig/backend/datastorage/ImmutableOutputStream.java
  128. +55 −0 src/org/apache/pig/backend/datastorage/SeekableInputStream.java
  129. +158 −0 src/org/apache/pig/backend/executionengine/ExecException.java
  130. +103 −0 src/org/apache/pig/backend/executionengine/ExecJob.java
  131. +26 −0 src/org/apache/pig/backend/executionengine/ExecPhysicalOperator.java
  132. +73 −0 src/org/apache/pig/backend/executionengine/ExecPhysicalPlan.java
  133. +32 −0 src/org/apache/pig/backend/executionengine/ExecScopedLogicalOperator.java
  134. +152 −0 src/org/apache/pig/backend/executionengine/ExecutionEngine.java
  135. +178 −0 src/org/apache/pig/backend/executionengine/PigSlice.java
  136. +127 −0 src/org/apache/pig/backend/executionengine/PigSlicer.java
  137. +70 −0 src/org/apache/pig/backend/executionengine/util/ExecTools.java
  138. +98 −0 src/org/apache/pig/backend/hadoop/DoubleWritable.java
  139. +207 −0 src/org/apache/pig/backend/hadoop/HDataType.java
  140. +52 −0 src/org/apache/pig/backend/hadoop/datastorage/ConfigurationUtil.java
  141. +72 −0 src/org/apache/pig/backend/hadoop/datastorage/HConfiguration.java
  142. +243 −0 src/org/apache/pig/backend/hadoop/datastorage/HDataStorage.java
  143. +180 −0 src/org/apache/pig/backend/hadoop/datastorage/HDirectory.java
  144. +80 −0 src/org/apache/pig/backend/hadoop/datastorage/HFile.java
  145. +204 −0 src/org/apache/pig/backend/hadoop/datastorage/HPath.java
  146. +116 −0 src/org/apache/pig/backend/hadoop/datastorage/HSeekableInputStream.java
  147. +756 −0 src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
  148. +151 −0 src/org/apache/pig/backend/hadoop/executionengine/HJob.java
  149. +33 −0 src/org/apache/pig/backend/hadoop/executionengine/MapRedResult.java
  150. +924 −0 src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/CombinerOptimizer.java
  151. +75 −0 src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/DistinctCombiner.java
Sorry, we could not display the entire diff because too many files (732) changed.
451 CHANGES.txt
@@ -0,0 +1,451 @@
+Pig Change Log
+
+Trunk (unreleased changes)
+
+ INCOMPATIBLE CHANGES
+
+ NEW FEATURES
+
+ PIG-554 Added fragment replicate map side join (shravanmn via pkamath and gates)
+
+ PIG-620: Added MaxTupleBy1stField UDF to piggybank (vzaliva via gates)
+
+ PIG-692: When running a job from a script, use the name of that script as
+ the default name for the job (vzaliva via gates)
+
+ OPTIMIZATIONS
+
+ BUG FIXES
+ PIG-24 Files that were incorrectly placed under test/reports have been
+ removed. ant clean now cleans test/reports. (milindb via gates)
+
+ PIG-25 com.yahoo.pig dir left under pig/test by mistake. removed it (olgan@)
+
+ PIG-23 Made pig work with java 1.5. (milindb via gates)
+
+ PIG-8 added binary comparator (olgan)
+
+ PIG-17 integrated with Hadoop 0.15 (olgan@)
+
+ PIG-11 Add capability to search for jar file to register. (antmagna via
+ olgan)
+
+ PIG-20 Added custom comparator functions for order by (phunt via gates)
+
+ PIG-33 Help was commented out - uncommented (olgan)
+
+ PIG-31: second half of concurrent mode problem addressed (olgan)
+
+ PIG-14: added heartbeat functionality (olgan)
+
+ PIG-17: updated hadoop15.jar to match hadoop 0.15.1 release
+
+ PIG-7: Added use of combiner in some restricted cases. (gates)
+
+ PIG-29: fixed bag factory to be properly initialized (utkarsh)
+
+ PIG-43: fixed problem where using the combiner prevented a pig alias
+ from being evaluated more than once. (gates)
+
+ PIG-45: Fixed pig.pl to not assume hodrc file is named the same as
+ cluster name (gates).
+
+ PIG-7 (more): Fixed bug in PigCombiner where it was writing IndexedTuples
+ instead of Tuples, causing Reducer to crash in some cases.
+
+ PIG-47: Added methods to DataMap to provide access to its content
+
+ PIG-12: Added time stamps to log4j messages (phunt via gates).
+
+ PIG-44: Added adaptive decision of the number of records to hold in memory
+ before spilling (utkarsh)
+ PIG-39: created more efficient version of read (spullara via olgan)
+
+ PIG-41: Added patterns to svn:ignore
+
+ PIG-51: Fixed combiner in the presence of flattening
+
+ PIG-30: Rewrote DataBags to better handle decisions of when to spill to
+ disk and to spill more intelligently. (gates)
+
+ PIG-61: Fixed MapreducePlanCompiler to use PigContext to load up the
+ comparator function instead of Class.forName. (gates)
+
+ PIG-56: Made DataBag implement Iterable. (groves via gates)
+
+ PIG-63: Fix for non-ascii UTF-8 data (breed@ and olgan@)
+
+ PIG-77: Added eclipse specific files to svn:ignore
+
+ PIG-57: Fixed NPE in PigContext.fixUpDomain (francisoud via gates)
+
+ PIG-69: NPE in PigContext.setJobtrackerLocation (francisoud via gates)
+
+ PIG-78: src/org/apache/pig/builtin/PigStorage.java doesn't compile (arun
+ via olgan)
+
+ PIG-32: ABstraction layer (olgan)
+
+ PIG-87: Fix pig.pl to find java via JAVA_HOME instead of hardcoded default
+ path. Also fix it to not die if pigclient.conf is missing. (craigm via
+ gates).
+
+ PIG-89: Fix DefaultDataBag, DistinctDataBag, SortedDataBag to close spill
+ files when they are done spilling (contributions by craigm, breed, and
+ gates, committed by gates).
+
+ PIG-95: Remove System.exit() statements from inside pig (joa23 via gates).
+
+ PIG-65: convert tabs to spaces (groves via olgan)
+
+ PIG-97: Turn off combiner in the case of Cogroup, as it doesn't work when
+ more than one bag is involved (gates).
+
+ PIG-92: Fix NullPointerException in PIgContext due to uninitialized conf
+ reference. (francisoud via gates)
+
+ PIG-83: Change everything except grunt and Main (PigServer on down) to use
+ common logging abstraction instead of log4j. By default in grunt, log4j
+ still used as logging layer. Also converted all System.out/err.println
+ statements to use logging instead. (francisoud via gates)
+
+ PIG-80: In a number of places stack trace information was being lost by an
+ exception being caught, and a different exception then thrown. All those
+ locations have been changed so that the new exception now wraps the old.
+ (francisoud via gates).
+
+ PIG-84: Converted printStackTrace calls to calls to the logger.
+ (francisoud via gates).
+
+ PIG-88: Remove unused HadoopExe import from Main. (pi_song via gates).
+
+ PIG-99: Fix to make unit tests not run out of memory. (francisoud via
+ gates).
+
+ PIG-107: enabled several tests. (francisoud via olgan)
+
+ PIG-46: abort processing on error for non-interactive mode (olston via
+ olgan)
+
+ PIG-109: improved exception handling (oae via olgan)
+
+ PIG-72: Move unit tests to use MiniDFS and MiniMR so that unit tests can
+ be run w/o access to a hadoop cluster. (xuzh via gates)
+
+ PIG-68: improvements to build.xml (joa23 via olgan)
+
+ PIG-110: Replaced code accidently merged out in PIG-32 fix that handled
+ flattening the combiner case. (gates and oae)
+
+ PIG-213: Remove non-static references to logger from data bags and tuples,
+ as it causes significant overhead (vgeschel via gates).
+
+ PIG-284: target for building source jar (oae via olgan)
+
+ PIG-294: string comparator unit tests (sms via pi_song)
+
+ PIG-258: cleaning up directories on failure (daijy via olgan)
+
+ PIG-139: command line editing (daijy via olgan)
+
+ PIG-270: proper line number for parse errors (daijy via olgan)
+
+ PIG-363: fix for describe to produce schema name
+
+ PIG-367: convinience function for UDFs to name schema
+
+ PIG-368: making JobConf available to Load/Store UDFs
+
+ PIG-311: cross is broken
+
+ PIG-369: support for filter UDFs
+
+ PIG-375: support for implicit split
+
+ PIG-301: fix for order by descending
+
+ PIG-378: fix for GENERATE + LIMIT
+
+ PIG-362: don't push limit above generate with flatten
+
+ PIG-381: bincond does not handle null data
+
+ PIG-382: bincond throws typecast exception
+
+ PIG-352: java.lang.ClassCastException when invalid field is accessed
+
+ PIG-329: TestStoreOld, 2 unit tests were broken
+
+ PIG-353: parsing of complex types
+
+ PIG-392: error handling with multiple MRjobs
+
+ PIG-397: code defaults to single reducer
+
+ PIG-373: unconnected load causes problem,
+
+ PIG-413: problem with float sum
+
+ PIG-398: Expressions not allowed inside foreach (sms via olgan)
+
+ PIG-418: divide by 0 problem
+
+ PIG-402: order by with user comparator (shravanmn via olgan)
+
+ PIG-415: problem with comparators (shravanmn via olgan)
+
+ PIG-422: cross is broken (shravanmn via olgan)
+
+ PIG-407: need to clone operators (pradeepkth via olgan)
+
+ PIG-428: TypeCastInserter does not replace projects in inner plans
+ correctly (pradeepkth vi olgan)
+
+ PIG-421: error with complex nested plan (sms via olgan)
+
+ PIG-429: Self join wth implicit split has the join output in wrong order
+ (pradeepkth via olgan)
+
+ PIG-434: short-circuit AND and OR (pradeepkth viia olgan)
+
+ PIG-333: allowing no parethesis with single column alias with flatten (sms
+ via olgan)
+
+ PIG-426: Adding result of two UDFs gives a syntax error
+
+ PIG-426: Adding result of two UDFs gives a syntax error (sms via olgan)
+
+ PIG-436: alias is lost when single column is flattened (pradeepkth via
+ olgan)
+
+ PIG-364: Limit return incorrect records when we use multiple reducer
+ (daijy via olgan)
+
+ PIG-439: disallow alias renaming (pradeepkth via olgan)
+
+ PIG-440: Exceptions from UDFs inside a foreach are not captured (pradeepkth
+ via olgan)
+
+ PIG-442: Disambiguated alias after a foreach flatten is not accessible a
+ couple of statements after the foreach (sms via olgan)
+
+ PIG-424: nested foreach with flatten and agg gives an error (sms via
+ olgan)
+
+ PIG-411: Pig leaves HOD processes behind if Ctrl-C is used before HOD
+ connection is fully established (olgan)
+
+ PIG-430: Projections in nested filter and inside foreach do not work (sms
+ via olgan)
+
+ PIG-445: Null Pointer Exceptions in the mappers leading to lot of retries
+ (shravanmn via olgan)
+
+ PIG-444: job.jar is left behined (pradeepkth via olgan)
+
+ PIG-447: improved error messages (pradeepkth via olgan)
+
+ PIG-448: explain broken after load with types (pradeepkth via olgan)
+
+ PIG-380: invalid schema for databag constant (sms via olgan)
+
+ PIG-451: If an field is part of group followed by flatten, then referring
+ to it causes a parse error (pradeepkth via olgan)
+
+ PIG-455: "group" alias is lost after a flatten(group) (pradeepkth vi olgan)
+
+ PIG-458: integration with Hadoop 18 (olgan)
+
+ PIG-459: increased sleep time before checking for job progress
+
+ PIG-462: LIMIT N should create one output file with N rows (shravanmn via
+ olgan)
+
+ PIG-443: Illustrate for the Types branch (shubham via olgan)
+
+ PIG-376: set job name (olgan)
+
+ PIG-463: POCast changes (pradeepkth via olgan)
+
+ PIG-427: casting input to UDFs
+
+ PIG-437: as in alias names causing problems (sms via olgan)
+
+ PIG-54: MIN/MAX don't deal with invalid data (pradeepkth via olgan)
+
+ PIG-470: TextLoader should produce bytearrays (sms via olgan)
+
+ PIG-335: lineage (sms vi olgan)
+
+ PIG-464: bag schema definition (pradeepkth via olgan)
+
+ PIG-457: report 100% on successful jobs only (shravanmn via olgan)
+
+ PIG-471: ignoring status errors from hadoop (pradeepkth via olgan)
+
+ PIG-465: performance improvement - removing keys from the value (pradeepkth
+ via olgan)
+
+ PIG-489: (*) processing (sms via olgan)
+
+ PIG-475: missing heartbeats (shravanmn via olgan)
+
+ PIG-468: make determine Schema work for BinStorage (pradeepkth via olgan)
+
+ PIG-494: invalid handling of UTF-8 data in PigStorage (pradeepkth via olgan)
+
+ PIG-501: Make branches/types work under cygwin (daijy via olgan)
+
+ PIG-504: cleanup illustrate not to produce cn= (shubham via olgan)
+
+ PIG-469: make sure that describe says "int" not "integer" (sms via olgan)
+
+ PIG-495: projecting of bags only give 1 field (olgan)
+
+ PIG-500: Load Func for POCast is not being set in some cases (sms via
+ olgan)
+
+ PIG-499: parser issue with as (sms via olgan)
+
+ PIG-507: permission error not reported (pradeepkth via olgan)
+
+ PIG-508: problem with double joins (pradeepkth via olgan)
+
+ PIG-497: problems with UTF8 handling in BinStorage (pradeepkth via olgan)
+
+ PIG-505: working with map elements (sms via olgan)
+
+ PIG-517: load functiin with parameters does not work with cast (pradeepkth
+ via olgan)
+
+ PIG-525: make sure cast for udf parameters works (olgan)
+
+ PIG-512: Expressions in foreach lead to errors (sms via olgan)
+
+ PIG-528: use UDF return in schema computation (sms via olgan)
+
+ PIG-527: allow PigStorage to write out complex output (sms via olgan)
+
+ PIG-537: Failure in Hadoop map collect stage due to type mismatch in the
+ keys used in cogroup (pradeepkth vi olgan)
+
+ PIG-538: support for null constants (pradeepkth via olgan)
+
+ PIG-385: more null handling (pradeepkth via olgan)
+
+ PIG-546: FilterFunc calls empty constructor when it should be calling
+ parameterized constructor (sms via olgan)
+
+ PIG-449: Schemas for bags should contain tuples all the time (pradeepkth via
+ olgan)
+
+ PIG-501: make unit tests run under windows (daijy via olgan)
+
+ PIG-543: Restore local mode to truly run locally instead of use map
+ reduce. (shubhamc via gates)
+
+ PIG-556: Changed FindQuantiles to report progress. Fixed issue with null
+ reporter being passed to EvalFuncs. (gates)
+
+ PIG-6: Add load support from hbase (hustlmsp via gates).
+
+ PIG-522: make negation work (pradeepkth via olgan)
+
+ PIG-563: support for multiple combiner invocations (pradeepkth via olgan)
+
+ PIG-580: using combiner to compute distinct aggs (pradeepkth via olgan)
+
+ PIG-558: Distinct followed by a Join results in Invalid size 0 for a tuple
+ error (pradeepkth via olgan)
+
+ PIG-572 A PigServer.registerScript() method, which lets a client
+ programmatically register a Pig Script. (shubhamc via gates)
+
+ PIG-570: problems with handling bzip data (breed via olgan)
+
+ PIG-599: Added buffering to BufferedPositionedInputStream (gates)
+
+ PIG-597: Fix for how * is treated by UDFs (shravanmn via olgan)
+
+ PIG-629: performance improvement: getting rid of targeted tuple (pradeepkth
+ via olgan)
+
+ PIG-623: Fix spelling errors in output messages (tomwhite via sms)
+
+ PIG-622: Include pig executable in distribution (tomwhite via sms)
+
+ PIG-628: misc performance improvements (pradeepkth via olgan)
+
+ PIG-589: error handling, phase 1-2 (sms via olgan)
+
+ PIG-615: Wrong number of jobs with limit (shravanmn via sms)
+
+ PIG-635: POCast.java has incorrect formatting (sms)
+
+ PIG-634: When POUnion is one of the roots of a map plan, POUnion.getNext()
+ gives a null pointer exception (pradeepkth)
+
+ PIG-632: Improved error message for binary operators (sms)
+
+ PIG-636: Performance improvement: Use lightweight bag implementations which do not
+ register with SpillableMemoryManager with Combiner (pradeepkth)
+
+ PIG-631: 4 Unit test failures on Windows (daijy)
+
+ PIG-645: Streaming is broken with the latest trunk (pradeepkth)
+
+ PIG-646: Distinct UDF should report progress (sms)
+
+ PIG-647: memory sized passed on pig command line does not get propagated
+ to JobConf (sms)
+
+ PIG-648: BinStorage fails when it finds markers unexpectedly in the data
+ (pradeepkth)
+
+ PIG-649: RandomSampleLoader does not handle skipping correctly in
+ getNext() (pradeepkth)
+
+ PIG-560: UTFDataFormatException (encoded string too long) is thrown when
+ storing strings > 65536 bytes (in UTF8 form) using BinStorage() (sms)
+
+ PIG-642: Limit after FRJ causes problems (daijy)
+
+ PIG-637: Limit broken after order by in the local mode (shubhamc via
+ olgan)
+
+ PIG-553: EvalFunc.finish() not getting called (shravanmn via sms)
+
+ PIG-654: Optimize build.xml (daijy)
+
+ PIG-574: allowing to run scripts from within grunt shell (hagleitn via
+ olgan)
+
+ PIG-665: Map key type not correctly set (for use when key is null) when
+ map plan does not have localrearrange (pradeepkth)
+
+ PIG-590: error handling on the backend (sms via olgan)
+
+ PIG-590: error handling on the backend (sms)
+
+ PIG-545: PERFORMANCE: Sampler for order bys does not produce a good
+ distribution (pradeepkth)
+
+ PIG-658: Data type long : When 'L' or 'l' is included with data
+ (123L or 123l) load produces null value. Also the case with Float (thejas
+ via sms)
+
+ PIG-591: Error handling phase four (sms via pradeepkth)
+
+ PIG-664: Semantics of * is not consistent (sms)
+
+ PIG-684: outputSchema method in TOKENIZE is broken (thejas via sms)
+
+ PIG-655: Comparison of schemas of bincond operands is flawed (sms via
+ pradeepkth)
+
+ PIG-691: BinStorage skips tuples when ^A is present in data (pradeepkth
+ via sms)
+
+ PIG-577: outer join query looses name information (sms via pradeepkth)
+
+ PIG-690: UNION doesn't work in the latest code (pradeepkth via sms)
29 KEYS
@@ -0,0 +1,29 @@
+pub 1024D/06687D96 2008-07-14 Olga Natkovich <olgan@yahoo-inc.com>
+sig 3 06687D96 2008-07-14 Olga Natkovich <olgan@yahoo-inc.com>
+sub 1024g/B6456039 2008-07-14
+sig 06687D96 2008-07-14 Olga Natkovich <olgan@yahoo-inc.com>
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.2.6 (GNU/Linux)
+
+mQGiBEh7rwMRBACRWYBxkUT3jICHIbgSxCpPkEzr8aXwWlRu0dAKXxj9nklyfqz8
+R5XGaD7W1HfyvmQzO1g0oQ15JFXPIhgNJ5ZbZptq/YaQf0qWAK2RRjFZNyQIg9KF
+nfFX6zdU2e5YIrx/aTQezOw6ThIUHE5hOP/vo6xo4IhjDjRBZxqtmFOtDwCgvklj
+b8issjgf0jwFH9mB3qTCDGMD/3cZTjU3gx+M3e+/7g52BK16zbLRCy3FETV2kxAI
+U0ghF032kENuuHzU9l1sGAeTSg9i+927ohg0zx+WP+HPgGZKY8CJLyswWmPWmAge
+ZJmXVlCad84/X7GD5lu1t3Ueibnw9ZWKH+oCG5mKJpBiklyyzk1OIwwIEsdsIeOF
+MrBFA/sF/cRDNWJWFVgb0jx0pLTkTN8SSePvklElIPfuYPmNj0fG95cQlXmooDbF
+5RHHG8WRuECrrzJTSpiqQ/7AFVnRWdY6Mb1IhVudDEyGSlUYCtXC63nbPWomTKOd
+NII92gS2DCBCXU+CWhsmTDsPyxbpA8sPGxzASJgoXvgVP1b4EbQkT2xnYSBOYXRr
+b3ZpY2ggPG9sZ2FuQHlhaG9vLWluYy5jb20+iF4EExECAB4FAkh7rwMCGwMGCwkI
+BwMCAxUCAwMWAgECHgECF4AACgkQ02eP0wZofZZUfQCgp+nujZ+/7uzlgNYecjfp
+nKyxZ6AAoJm9hjNpTYnzCKSmXRKh2Gdo8N8+uQENBEh7rwQQBACqUm8KbPN/a1Yt
+aG6Hlcj1Zx5jnbCZ+PYzkKN4LAvnmBHCgJUsliEiJ5XGVOVIbMyVT9+Ivwei81Px
+F95HeBarD7cISOlnbGHLME7RCpMFx36I5YR0ZFAYtGS8q1YM6nUN8hqmRVlBYC/7
+D7j9PQ/cM6Z/jS1jJ1q7XIFXzPIplwADBQQAgHMMryaVQvN/7zhrgN5E9tLLeCCF
+b7Al3yfZteuqg3hTU2fuHPuw9SwdL8whbglmKsGylZJ7ULdq1EZQztuGbQ1CwVVb
+QWeLnLktB1Q2F0x2YqdtVi3/+uj5rXYOxcKns0iOS3y6mKccpuEk6CUiFMtLh7NB
++bQz/IDHO3Ql+vGISQQYEQIACQUCSHuvBAIbDAAKCRDTZ4/TBmh9lqZHAJsHomGp
+LMYTpbXVonowdEk4YL45jQCfY2/fXDhD2IwYB+PGmdcsWeEv10Y=
+=WLZG
+-----END PGP PUBLIC KEY BLOCK-----
202 LICENSE.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
15 NOTICE.txt
@@ -0,0 +1,15 @@
+Apache Pig
+Copyright 2008 The Apache Software Foundation
+
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).
+
+This product includes/uses the JavaCC (https://javacc.dev.java.net/),
+Copyright (c) 2003 Sun Microsystems, Inc.
+
+This product includes/uses Jetty (http://jetty.mortbay.org/),
+Copyright (c) Mort Bay Consulting Pty. Ltd. (Australia)
+
+This product includes/uses Jsch (http://www.jcraft.com/jsch/),
+Copyright (c) 2002,2003,2004,2005,2006,2007,2008 Atsuhiko Yamanaka,
+JCraft,Inc. All rights reserved.
50 README.txt
@@ -0,0 +1,50 @@
+Apache Pig
+===========
+Pig is a dataflow programming environment for processing very large files. Pig's
+language is called Pig Latin. A Pig Latin program consists of a directed
+acyclic graph where each node represents an operation that transforms data.
+Operations are of two flavors: (1) relational-algebra style operations such as
+join, filter, project; (2) functional-programming style operators such as map,
+reduce.
+
+Pig compiles these dataflow programs into (sequences of) map-reduce jobs and
+executes them using Hadoop. It is also possible to execute Pig Latin programs
+in a "local" mode (without Hadoop cluster), in which case all processing takes
+place in a single local JVM.
+
+General Info
+===============
+
+For the latest information about Pig, please visit our website at:
+
+ http://incubator.apache.org/pig/
+
+and our wiki, at:
+
+ http://wiki.apache.org/pig/
+
+Getting Started
+===============
+1. To learn about Pig, try http://wiki.apache.org/pig/PigTutorial
+2. To build and run Pig, try http://wiki.apache.org/pig/BuildPig and
+http://wiki.apache.org/pig/RunPig
+3. To check out the function library, try http://wiki.apache.org/pig/PiggyBank
+
+
+Contributing to the Project
+===========================
+
+We welcome all contributions. For the details, please, visit
+http://wiki.apache.org/pig/HowToContribute.
+
+Incubator Disclaimer
+=====================
+
+Apache Pig is an effort undergoing incubation at The Apache Software
+Foundation (ASF). Incubation is required of all newly accepted projects
+until a further review indicates that the infrastructure, communications,
+and decision making process have stabilized in a manner consistent with
+other successful ASF projects. While incubation status is not necessarily
+a reflection of the completeness or stability of the code, it does indicate
+that the project has yet to be fully endorsed by the ASF.
+
46 autocomplete
@@ -0,0 +1,46 @@
+# This is auto-complete file for grunt. Put this file anywhere in
+# your CLASSPATH or your working directory. Auto-complete entry
+# can be anything include piggy bank tokens, commonly used paths,
+# etc. The only constraint is that entry can not contain whitespace
+# (include tab).
+#
+UPPER
+getExponent
+CEIL
+RANDOM
+LOG
+toDegrees
+LOG1P
+NEXTUP
+MIN
+MAX
+SIN
+TANH
+ASIN
+COSH
+EXP
+IEEEremainder
+nextAfter
+ULP
+SINH
+SIGNUM
+HYPOT
+ATAN2
+ABS
+POW
+RINT
+FLOOR
+LOG10
+TAN
+ATAN
+toRadians
+COS
+SCALB
+ACOS
+ROUND
+SQRT
+CBRT
+EXPM1
+copySign
+COR
+COV
175 bin/pig
@@ -0,0 +1,175 @@
+#!/usr/bin/env bash
+#
+# The Pig command script
+#
+# Environment Variables
+#
+# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
+#
+# PIG_CLASSPATH Extra Java CLASSPATH entries.
+#
+# PIG_HEAPSIZE The maximum amount of heap to use, in MB.
+# Default is 1000.
+#
+# PIG_OPTS Extra Java runtime options.
+#
+# PIG_CONF_DIR Alternate conf dir. Default is ${PIG_HOME}/conf.
+#
+# PIG_ROOT_LOGGER The root appender. Default is INFO,console
+#
+# PIG_HADOOP_VERSION Version of hadoop to run with. Default is 17 (0.17).
+
+cygwin=false
+case "`uname`" in
+CYGWIN*) cygwin=true;;
+esac
+debug=false
+
+# filter command line parameter
+for f in $@; do
+ if [[ $f = "-secretDebugCmd" ]]; then
+ debug=true
+ else
+ remaining="${remaining} $f"
+ fi
+done
+
+# resolve links - $0 may be a softlink
+this="$0"
+while [ -h "$this" ]; do
+ ls=`ls -ld "$this"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '.*/.*' > /dev/null; then
+ this="$link"
+ else
+ this=`dirname "$this"`/"$link"
+ fi
+done
+
+# convert relative path to absolute path
+bin=`dirname "$this"`
+script=`basename "$this"`
+bin=`unset CDPATH; cd "$bin"; pwd`
+this="$bin/$script"
+
+# the root of the Pig installation
+export PIG_HOME=`dirname "$this"`/..
+
+#check to see if the conf dir is given as an optional argument
+if [ $# -gt 1 ]
+then
+ if [ "--config" = "$1" ]
+ then
+ shift
+ confdir=$1
+ shift
+ PIG_CONF_DIR=$confdir
+ fi
+fi
+
+# Allow alternate conf dir location.
+PIG_CONF_DIR="${PIG_CONF_DIR:-$PIG_HOME/conf}"
+
+if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then
+ . "${PIG_CONF_DIR}/pig-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+ #echo "run java in $JAVA_HOME"
+ JAVA_HOME=$JAVA_HOME
+fi
+
+if [ "$JAVA_HOME" = "" ]; then
+ echo "Error: JAVA_HOME is not set."
+ exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m
+
+# check envvars which might override default args
+if [ "$PIG_HEAPSIZE" != "" ]; then
+ JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m"
+fi
+
+# CLASSPATH initially contains $PIG_CONF_DIR
+CLASSPATH="${PIG_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+
+# for developers, add Pig classes to CLASSPATH
+if [ -d "$PIG_HOME/build/classes" ]; then
+ CLASSPATH=${CLASSPATH}:$PIG_HOME/build/classes
+fi
+if [ -d "$PIG_HOME/build/test/classes" ]; then
+ CLASSPATH=${CLASSPATH}:$PIG_HOME/build/test/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core pig to CLASSPATH
+for f in $PIG_HOME/pig-*-core.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+done
+
+# during development pig jar might be in build
+for f in $PIG_HOME/build/pig-*-core.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+done
+
+# Set the version for Hadoop, default to 17
+PIG_HADOOP_VERSION="${PIG_HADOOP_VERSION:-17}"
+# add libs to CLASSPATH. There can be more than one version of the hadoop
+# libraries in the lib dir, so don't blindly add them all. Only add the one
+# that matche PIG_HADOOP_VERSION.
+for f in $PIG_HOME/lib/*.jar; do
+ IS_HADOOP=`echo $f | grep hadoop`
+ if [ "${IS_HADOOP}x" == "x" ]; then
+ CLASSPATH=${CLASSPATH}:$f;
+ else
+ IS_RIGHT_VER=`echo $f | grep hadoop${PIG_HADOOP_VERSION}.jar`
+ if [ "${IS_RIGHT_VER}x" != "x" ]; then
+ CLASSPATH=${CLASSPATH}:$f;
+ fi
+ fi
+done
+
+# add user-specified CLASSPATH last
+if [ "$PIG_CLASSPATH" != "" ]; then
+ CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH}
+fi
+
+# default log directory & file
+if [ "$PIG_LOG_DIR" = "" ]; then
+ PIG_LOG_DIR="$PIG_HOME/logs"
+fi
+if [ "$PIG_LOGFILE" = "" ]; then
+ PIG_LOGFILE='pig.log'
+fi
+
+# cygwin path translation
+if $cygwin; then
+ CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+ PIG_HOME=`cygpath -d "$PIG_HOME"`
+ PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"`
+fi
+
+# restore ordinary behaviour
+unset IFS
+
+CLASS=org.apache.pig.Main
+
+PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR"
+PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
+PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
+PIG_OPTS="$PIG_OPTS -Dpig.root.logger=${PIG_ROOT_LOGGER:-INFO,console,DRFA}"
+
+# run it
+if [ "$debug" == "true" ]; then
+ echo "dry run:"
+ echo "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
+ echo
+else
+ exec "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining}
+fi
451 build.xml
@@ -0,0 +1,451 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="Pig" default="jar">
+
+ <!-- Load all the default properties, and any the user wants -->
+ <!-- to contribute (without having to type -D or edit this file -->
+ <property file="${user.home}/build.properties" />
+ <property file="${basedir}/build.properties" />
+
+ <!-- name and version properties -->
+ <property name="name" value="pig" />
+ <property name="Name" value="Pig" />
+ <property name="version" value="0.1.0-dev" />
+ <property name="final.name" value="${name}-${version}" />
+ <condition property="isWindows">
+ <os family="windows"/>
+ </condition>
+
+ <!-- source properties -->
+ <property name="lib.dir" value="${basedir}/lib/" />
+ <property name="src.dir" value="${basedir}/src/" />
+ <property name="src.lib.dir" value="${basedir}/lib-src/" />
+ <property name="src.gen.dir" value="${basedir}/src-gen/" />
+ <property name="src.docs.dir" value="${basedir}/docs/" />
+
+
+ <!-- build properties -->
+ <property name="build.dir" value="${basedir}/build" />
+ <property name="build.classes" value="${build.dir}/classes" />
+ <property name="build.docs" value="${build.dir}/docs" />
+ <property name="build.javadoc" value="${build.docs}/api" />
+ <property name="dist.dir" value="${build.dir}/${final.name}" />
+ <property name="build.encoding" value="ISO-8859-1" />
+ <!-- TODO with only one version of hadoop in the lib folder we do not need that anymore -->
+ <property name="hadoop.jarfile" value="hadoop18.jar" />
+ <property name="hbase.jarfile" value="hbase-0.18.1.jar" />
+ <property name="hbase.test.jarfile" value="hbase-0.18.1-test.jar" />
+
+ <!-- javac properties -->
+ <property name="javac.debug" value="on" />
+ <property name="javac.optimize" value="on" />
+ <property name="javac.deprecation" value="on" />
+ <property name="javac.version" value="1.5" />
+ <property name="javac.args" value="" />
+ <!-- default warnings option -->
+ <property name="javac.args.warnings" value="-Xmaxwarns 1000000" />
+ <!-- warnings option if all.warnings property is set on cmdline -->
+ <property name="javac.args.all.warnings" value="-Xmaxwarns 1000000 -Xlint" />
+
+ <!-- jar names. TODO we might want to use the svn reversion name in the name in case it is a dev version -->
+ <property name="output.jarfile" value="${build.dir}/${final.name}.jar" />
+ <property name="output.jarfile.core" value="${build.dir}/${final.name}-core.jar" />
+ <property name="output.jarfile.sources" value="${build.dir}/${final.name}-sources.jar" />
+ <!-- Maintain old pig.jar in top level directory. -->
+ <property name="output.jarfile.backcompat" value="${basedir}/${name}.jar" />
+
+ <!-- test properties -->
+ <property name="test.src.dir" value="${basedir}/test" />
+ <property name="test.build.dir" value="${build.dir}/test" />
+ <property name="test.build.classes" value="${test.build.dir}/classes" />
+ <property name="test.log.dir" value="${test.build.dir}/logs" />
+ <property name="test.timeout" value="900000" />
+ <property name="test.junit.output.format" value="plain" />
+
+ <!-- test configuration, use ${user.home}/build.properties to configure values -->
+ <property name="ssh.gateway" value="" />
+ <property name="hod.server" value="" />
+ <property name="junit.hadoop.conf" value="" />
+ <property name="test.log.dir" value="${basedir}/test/logs"/>
+ <property name="junit.hadoop.conf" value="${user.home}/pigtest/conf/"/>
+ <property name="test.output" value="no"/>
+
+ <!-- javacc properties -->
+ <property name="src.gen.query.parser.dir" value="${src.gen.dir}/org/apache/pig/impl/logicalLayer/parser" />
+ <property name="src.gen.script.parser.dir" value="${src.gen.dir}/org/apache/pig/tools/pigscript/parser" />
+ <property name="src.gen.param.parser.dir" value="${src.gen.dir}/org/apache/pig/tools/parameters" />
+ <property name="src.gen.dot.parser.dir" value="${test.src.dir}/org/apache/pig/test/utils/dotGraph/parser" />
+ <property name="src.gen.textdata.parser.dir" value="${src.gen.dir}/org/apache/pig/data/parser" />
+ <property name="javacc.home" value="${basedir}/lib" />
+
+ <!-- javadoc properties -->
+ <property name="javadoc.link.java" value="http://java.sun.com/j2se/1.5.0/docs/api/" />
+
+ <!-- ====================================================== -->
+ <!-- Stuff needed by all targets -->
+ <!-- ====================================================== -->
+ <!-- setup the classpath -->
+ <path id="classpath">
+ <fileset file="${lib.dir}/${hadoop.jarfile}" />
+ <fileset file="${lib.dir}/${hbase.jarfile}" />
+ <fileset file="${lib.dir}/${hbase.test.jarfile}" />
+ <fileset file="${lib.dir}/javacc.jar" />
+ <fileset file="${lib.dir}/jsch-0.1.33.jar" />
+ <fileset file="${lib.dir}/junit-4.1.jar" />
+ <fileset file="${lib.dir}/jline-0.9.94.jar" />
+ <fileset file="${lib.dir}/commons-collections-3.2.jar" />
+ </path>
+
+ <path id="test.classpath">
+ <pathelement location="${build.classes}"/>
+ <pathelement location="${test.src.dir}"/>
+ <path refid="classpath"/>
+ </path>
+
+
+ <target name="init">
+ <mkdir dir="${src.gen.query.parser.dir}" />
+ <mkdir dir="${src.gen.script.parser.dir}" />
+ <mkdir dir="${src.gen.param.parser.dir}" />
+ <mkdir dir="${dist.dir}" />
+ <mkdir dir="${build.classes}" />
+ <mkdir dir="${test.build.classes}" />
+ <mkdir dir="${src.gen.dot.parser.dir}" />
+ <mkdir dir="${src.gen.textdata.parser.dir}" />
+ <tstamp>
+ <format property="timestamp" pattern="MMM dd yyyy, HH:mm:ss" />
+ </tstamp>
+ <svnversion outputproperty="svn.revision"/>
+ </target>
+
+ <macrodef name="svnversion">
+ <!-- the path needs to be small content otherwise it will take AGES ! -->
+ <attribute name="wcpath" default="${basedir}" />
+ <attribute name="outputproperty" />
+ <sequential>
+ <exec executable="svnversion" outputproperty="@{outputproperty}" failonerror="false" failifexecutionfails="false" >
+ <arg value="@{wcpath}" />
+ <redirector>
+ <outputfilterchain>
+ <tokenfilter>
+ <!-- version can be xxxx, xxxx:yyyy, xxxxM, xxxxS or xxxx:yyyyMS , ... just get the working copy one -->
+ <replaceregex pattern="((\d+).*)" replace="\2" />
+ </tokenfilter>
+ </outputfilterchain>
+ </redirector>
+ </exec>
+ </sequential>
+ </macrodef>
+
+
+ <!-- ================================================================== -->
+ <!-- Clean. Delete the build files, and their directories -->
+ <!-- ================================================================== -->
+ <target name="clean" description="Cleanup build artifacts">
+ <delete dir="${src.gen.dir}" />
+ <delete dir="${build.dir}" />
+ <delete dir="${src.gen.dot.parser.dir}" />
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Java Compiler Compiler, generate Parsers -->
+ <!-- ================================================================== -->
+ <target name="cc-compile" depends="init" description="Create and Compile Parser">
+ <jjtree target="${src.dir}/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt" outputdirectory="${src.gen.query.parser.dir}" javacchome="${javacc.home}" />
+ <javacc target="${src.gen.query.parser.dir}/QueryParser.jj" outputdirectory="${src.gen.query.parser.dir}" javacchome="${javacc.home}" />
+ <javacc target="${src.dir}/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj" outputdirectory="${src.gen.script.parser.dir}" javacchome="${javacc.home}" />
+ <javacc target="${src.dir}/org/apache/pig/tools/parameters/PigFileParser.jj" outputdirectory="${src.gen.param.parser.dir}" javacchome="${javacc.home}" />
+ <javacc target="${src.dir}/org/apache/pig/tools/parameters/ParamLoader.jj" outputdirectory="${src.gen.param.parser.dir}" javacchome="${javacc.home}" />
+ <jjtree target="${test.src.dir}/org/apache/pig/test/utils/dotGraph/DOTParser.jjt" outputdirectory="${src.gen.dot.parser.dir}" javacchome="${javacc.home}" />
+ <javacc target="${src.gen.dot.parser.dir}/DOTParser.jj" outputdirectory="${src.gen.dot.parser.dir}" javacchome="${javacc.home}" />
+ <jjtree target="${src.dir}/org/apache/pig/data/parser/TextDataParser.jjt" outputdirectory="${src.gen.textdata.parser.dir}/" javacchome="${javacc.home}" />
+ <javacc target="${src.gen.textdata.parser.dir}/TextDataParser.jj" outputdirectory="${src.gen.textdata.parser.dir}" javacchome="${javacc.home}" />
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Build sources -->
+ <!-- ================================================================== -->
+ <target name="compile" depends="init, cc-compile" description="Compile all artifacts">
+ <echo>*** Building Main Sources ***</echo>
+ <echo>*** To compile with all warnings enabled, supply -Dall.warnings=1 on command line ***</echo>
+ <echo>*** If all.warnings property is supplied, compile-sources-all-warnings target will be executed ***</echo>
+ <echo>*** Else, compile-sources (which only warns about deprecations) target will be executed ***</echo>
+
+ <antcall target="compile-sources">
+ <param name="sources" value="${src.dir};${src.gen.dir};${src.lib.dir}/shock;${src.lib.dir}/bzip2" />
+ <param name="dist" value="${build.classes}" />
+ <param name="cp" value="classpath" />
+ </antcall>
+
+ <antcall target="compile-sources-all-warnings">
+ <param name="sources" value="${src.dir};${src.gen.dir};${src.lib.dir}/shock;${src.lib.dir}/bzip2" />
+ <param name="dist" value="${build.classes}" />
+ <param name="cp" value="classpath" />
+ </antcall>
+
+ </target>
+
+ <target name="compile-test" depends="compile">
+ <echo>*** Building Test Sources ***</echo>
+ <echo>*** To compile with all warnings enabled, supply -Dall.warnings=1 on command line ***</echo>
+ <echo>*** If all.warnings property is supplied, compile-sources-all-warnings target will be executed ***</echo>
+ <echo>*** Else, compile-sources (which only warns about deprecations) target will be executed ***</echo>
+
+ <antcall target="compile-sources">
+ <param name="sources" value="${test.src.dir}" />
+ <param name="dist" value="${test.build.classes}" />
+ <param name="cp" value="test.classpath" />
+ </antcall>
+
+ <antcall target="compile-sources-all-warnings">
+ <param name="sources" value="${test.src.dir}" />
+ <param name="dist" value="${test.build.classes}" />
+ <param name="cp" value="test.classpath" />
+ </antcall>
+
+ </target>
+
+ <!-- This target is for default compilation -->
+ <target name="compile-sources" unless="all.warnings">
+ <javac encoding="${build.encoding}" srcdir="${sources}"
+ includes="**/*.java" destdir="${dist}" debug="${javac.debug}"
+ optimize="${javac.optimize}" target="${javac.version}"
+ source="${javac.version}" deprecation="${javac.deprecation}">
+ <compilerarg line="${javac.args} ${javac.args.warnings}"/>
+ <classpath refid="${cp}" />
+ </javac>
+ <copy file="${src.dir}/org/apache/pig/tools/grunt/autocomplete" todir="${build.classes}/org/apache/pig/tools/grunt"/>
+ </target>
+
+ <!-- this target is for compilation with all warnings enabled -->
+ <target name="compile-sources-all-warnings" if="all.warnings">
+ <javac encoding="${build.encoding}" srcdir="${sources}"
+ includes="**/*.java" destdir="${dist}" debug="${javac.debug}"
+ optimize="${javac.optimize}" target="${javac.version}"
+ source="${javac.version}" deprecation="${javac.deprecation}">
+ <compilerarg line="${javac.args} ${javac.args.all.warnings} "/>
+ <classpath refid="${cp}" />
+ </javac>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Documentation -->
+ <!-- ================================================================== -->
+ <target name="javadoc" depends="jar" description="Create documentation">
+ <mkdir dir="${build.javadoc}" />
+ <javadoc overview="${src.docs.dir}/overview.html" packagenames="org.apache.pig.*" destdir="${build.javadoc}" author="true" version="true" use="true" windowtitle="${Name} ${version} API" doctitle="${Name} ${version} API" bottom="Copyright &amp;copy; ${year} The Apache Software Foundation">
+ <packageset dir="${src.dir}" />
+ <link href="${javadoc.link.java}" />
+ <classpath>
+ <path refid="classpath" />
+ <pathelement path="${java.class.path}" />
+ <pathelement path="${output.jarfile}" />
+ </classpath>
+ <group title="pig" packages="org.apache.*" />
+ </javadoc>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- @depricated, Documentation -->
+ <!-- ================================================================== -->
+ <target name="doc" depends="javadoc">
+ </target>
+
+ <target name="source-jar" depends="cc-compile">
+ <jar duplicate="preserve" jarfile="${output.jarfile.sources}" basedir="${src.dir}">
+ <manifest>
+ <section name="org/apache/pig">
+ <attribute name="Implementation-Vendor" value="Apache" />
+ <attribute name="Implementation-Title" value="Pig" />
+ <attribute name="Implementation-Version" value="${version}" />
+ </section>
+ </manifest>
+ <fileset dir="${src.gen.dir}"/>
+ <fileset dir="${src.lib.dir}/shock"/>
+ <fileset dir="${src.lib.dir}/bzip2"/>
+ </jar>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Make pig.jar -->
+ <!-- ================================================================== -->
+ <!-- TODO we should also exculte test here... -->
+ <!-- ================================================================== -->
+ <target name="jar" depends="compile" description="Create pig jar">
+ <antcall target="jarWithSvn"/>
+ <antcall target="jarWithOutSvn"/>
+ </target>
+
+ <target name="jarWithSvn" if="svn.revision">
+ <antcall target="buildJar">
+ <param name="svnString" value="${svn.revision}" />
+ </antcall>
+ </target>
+
+ <target name="jarWithOutSvn" unless="svn.revision">
+ <antcall target="buildJar">
+ <param name="svnString" value=": unknown" />
+ </antcall>
+ </target>
+
+ <target name="buildJar">
+ <echo>svnString ${svnString}</echo>
+ <jar jarfile="${output.jarfile.core}" basedir="${build.classes}">
+ <manifest>
+ <attribute name="Main-Class" value="org.apache.pig.Main" />
+ <section name="org/apache/pig">
+ <attribute name="Implementation-Vendor" value="Apache" />
+ <attribute name="Implementation-Title" value="Pig" />
+ <attribute name="Implementation-Version" value="${version}" />
+ <attribute name="Build-TimeStamp" value="${timestamp}" />
+ <attribute name="Svn-Revision" value="${svnString}" />
+ </section>
+ </manifest>
+ </jar>
+ <!-- @depricated -->
+ <jar jarfile="${output.jarfile}" basedir="${build.classes}">
+ <manifest>
+ <attribute name="Main-Class" value="org.apache.pig.Main" />
+ <section name="org/apache/pig">
+ <attribute name="Implementation-Vendor" value="Apache" />
+ <attribute name="Implementation-Title" value="Pig" />
+ <attribute name="Implementation-Version" value="${version}" />
+ <attribute name="Build-TimeStamp" value="${timestamp}" />
+ <attribute name="Svn-Revision" value="${svnString}" />
+ </section>
+ </manifest>
+ <zipfileset src="${lib.dir}/junit-4.1.jar" />
+ <zipfileset src="${lib.dir}/${hadoop.jarfile}" />
+ <zipfileset src="${lib.dir}/jsch-0.1.33.jar" />
+ <zipfileset src="${lib.dir}/jline-0.9.94.jar" />
+ </jar>
+ <copy file="${output.jarfile}" tofile="${output.jarfile.backcompat}"/>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Run unit tests -->
+ <!-- ================================================================== -->
+ <target name="test" depends="compile-test,jar">
+ <delete dir="${test.log.dir}"/>
+ <mkdir dir="${test.log.dir}"/>
+ <junit showoutput="${test.output}" printsummary="yes" haltonfailure="no" fork="yes" maxmemory="256m" dir="${basedir}" timeout="${test.timeout}" errorProperty="tests.failed" failureProperty="tests.failed">
+ <sysproperty key="ssh.gateway" value="${ssh.gateway}" />
+ <sysproperty key="hod.server" value="${hod.server}" />
+ <!-- <sysproperty key="hod.command" value="${hod.command}"/>
+ <sysproperty key="hod.param" value="${hod.param}"/> -->
+ <sysproperty key="hadoop.log.dir" value="${test.log.dir}"/>
+ <classpath>
+ <pathelement location="${output.jarfile}" />
+ <pathelement location="${test.build.classes}" />
+ <pathelement location="${junit.hadoop.conf}" />
+ <path refid="classpath"/>
+ </classpath>
+ <formatter type="${test.junit.output.format}" />
+
+
+ <batchtest fork="yes" todir="${test.log.dir}" unless="testcase">
+ <fileset dir="test">
+ <include name="**/*Test*.java" />
+ <!-- Excluced because they are end-to-end, don't work yet. -->
+ <!--
+ <exclude name="**/TestFilterOpNumeric.java" />
+ <exclude name="**/TestPigFile.java" />
+ <exclude name="**/TestStoreOld.java" />
+ -->
+ <!-- Excluded under Windows.-->
+ <exclude name="**/TestHBaseStorage.java" if="isWindows" />
+ <!-- Excluced because we don't want to run them -->
+ <exclude name="**/PigExecTestCase.java" />
+ <exclude name="**/TypeCheckingTestUtil.java" />
+ <exclude name="**/TypeGraphPrinter.java" />
+ <exclude name="**/LogicalPlanTester.java" />
+ <exclude name="**/TestHelper.java" />
+ <exclude name="**/TestLargeFile.java" />
+ <exclude name="**/TestOrderBy.java" />
+ <exclude name="**/TestOrderBy2.java" />
+ <exclude name="**/TestPi.java" />
+ <exclude name="**/nightly/**" />
+ </fileset>
+ </batchtest>
+ <batchtest fork="yes" todir="${test.log.dir}" if="testcase">
+ <fileset dir="test" includes="**/${testcase}.java"/>
+ </batchtest>
+ </junit>
+ <fail if="tests.failed">Tests failed!</fail>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- D I S T R I B U T I O N -->
+ <!-- ================================================================== -->
+ <target name="package" depends="jar, javadoc" description="Create a Pig release">
+ <mkdir dir="${dist.dir}" />
+ <mkdir dir="${dist.dir}/lib" />
+ <mkdir dir="${dist.dir}/scripts" />
+ <mkdir dir="${dist.dir}/docs" />
+ <mkdir dir="${dist.dir}/docs/api" />
+
+ <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
+ <fileset dir="${lib.dir}">
+ </fileset>
+ </copy>
+
+ <copy file="${output.jarfile.core}" todir="${dist.dir}" />
+
+ <copy todir="${dist.dir}/bin">
+ <fileset dir="bin" />
+ </copy>
+
+ <copy todir="${dist.dir}/docs">
+ <fileset dir="${build.docs}" />
+ </copy>
+
+ <copy todir="${dist.dir}/src" includeEmptyDirs="true">
+ <fileset dir="${src.dir}" />
+ <fileset dir="${src.gen.dir}" />
+ </copy>
+
+ <copy todir="${dist.dir}/" file="build.xml" />
+
+ <copy todir="${dist.dir}">
+ <fileset dir=".">
+ <include name="*.txt" />
+ </fileset>
+ </copy>
+
+ <chmod perm="ugo+x" type="file">
+ <fileset dir="${dist.dir}/bin" />
+ </chmod>
+
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Make release tarball -->
+ <!-- ================================================================== -->
+ <target name="tar" depends="package" description="Create release tarball">
+ <tar compression="gzip" longfile="gnu" destfile="${build.dir}/${final.name}.tar.gz">
+ <tarfileset dir="${build.dir}" mode="664">
+ <exclude name="${final.name}/bin/*" />
+ <include name="${final.name}/**" />
+ </tarfileset>
+ <tarfileset dir="${build.dir}" mode="755">
+ <include name="${final.name}/bin/*" />
+ </tarfileset>
+ </tar>
+ </target>
+
+</project>
8 conf/log4j.properties
@@ -0,0 +1,8 @@
+# ***** Set root logger level to DEBUG and its only appender to A.
+log4j.rootLogger=info, A
+
+# ***** A is set to be a ConsoleAppender.
+log4j.appender.A=org.apache.log4j.ConsoleAppender
+# ***** A uses PatternLayout.
+log4j.appender.A.layout=org.apache.log4j.PatternLayout
+log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
62 conf/pig.properties
@@ -0,0 +1,62 @@
+# Pig configuration file. All values can be overwritten by command line arguments.
+# see bin/pig -help
+
+# log4jconf log4j configuration file
+# log4jconf=./conf/log4j.properties
+
+# brief logging (no timestamps)
+brief=false
+
+# clustername, name of the hadoop jobtracker. If no port is defined port 50020 will be used.
+#cluster
+
+#debug level, INFO is default
+debug=INFO
+
+# a file that contains pig script
+#file=
+
+# load jarfile, colon separated
+#jar=
+
+#verbose print all log messages to screen (default to print only INFO and above to screen)
+verbose=false
+
+#exectype local|mapreduce, mapreduce is default
+#exectype=mapreduce
+# hod realted properties
+#ssh.gateway
+#hod.expect.root
+#hod.expect.uselatest
+#hod.command
+#hod.config.dir
+#hod.param
+
+
+#Do not spill temp files smaller than this size (bytes)
+pig.spill.size.threshold=5000000
+#EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes)
+#This should help reduce the number of files being spilled.
+pig.spill.gc.activation.size=40000000
+
+
+######################
+# Everything below this line is Yahoo specific. Note that I've made
+# (almost) no changes to the lines above to make merging in from Apache
+# easier. Any values I don't want from above I override below.
+#
+# This file is configured for use with HOD on the production clusters. If you
+# want to run pig with a static cluster you will need to remove everything
+# below this line and set the cluster value (above) to the
+# hostname and port of your job tracker.
+
+exectype=mapreduce
+
+hod.config.dir=/export/crawlspace/kryptonite/hod/current/conf
+hod.server=local
+
+cluster.domain=inktomisearch.com
+
+log.file=
+
+yinst.cluster=kryptonite
3 contrib/CHANGES.txt
@@ -0,0 +1,3 @@
+PIG-246: created UDF repository (olgan)
+PIG-245: UDF wrappers for Java Math functions (ajaygarg via olgan)
+PIG-277: UDF for computing correlation and covariance between data sets (ajaygarg via olgan)
122 contrib/piggybank/java/build.xml
@@ -0,0 +1,122 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project basedir="." default="jar" name="pigudf">
+ <!-- javac properties -->
+ <property name="javac.debug" value="on" />
+ <property name="javac.level" value="source,lines,vars"/>
+ <property name="javac.optimize" value="on" />
+ <property name="javac.deprecation" value="off" />
+ <property name="javac.version" value="1.5" />
+ <property name="javac.args" value="" />
+ <!-- TODO we should use warning... <property name="javac.args.warnings" value="-Xlint:unchecked" /> -->
+ <property name="javac.args.warnings" value="" />
+
+ <!-- build properties -->
+ <property name="build.dir" value="${basedir}/build" />
+ <property name="build.classes" value="${build.dir}/classes" />
+ <property name="build.docs" value="${build.dir}/docs" />
+ <property name="build.javadoc" value="${build.docs}/api" />
+ <property name="pigjar" value="../../../pig.jar" />
+ <property name="udfjar" value="piggybank.jar" />
+ <property name="src.dir" value="src/main/java/org/apache/pig/piggybank" />
+
+ <!-- jar properties -->
+ <property name=".javadoc" value="${build.docs}/api" />
+
+ <!-- test properties -->
+ <property name="test.build.dir" value="${build.dir}/test" />
+ <property name="test.classes" value="${test.build.dir}/classes" />
+ <property name="test.logs" value="${test.build.dir}/logs" />
+ <property name="test.timeout" value="900000" />
+ <property name="test.junit.output.format" value="plain" />
+ <property name="test.src.dir" value="src/test/java" />
+
+ <path id="pigudf.classpath">
+ <pathelement location="${build.classes}"/>
+ <pathelement location="${pigjar}"/>
+ </path>
+
+ <path id="test.classpath">
+ <pathelement location="${build.classes}"/>
+ <pathelement location="${test.classes}"/>
+ <pathelement location="${test.src.dir}"/>
+ <path refid="pigudf.classpath"/>
+ </path>
+
+ <target name="init">
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${build.classes}"/>
+ <mkdir dir="${test.build.dir}"/>
+ <mkdir dir="${test.classes}"/>
+ <mkdir dir="${build.javadoc}"/>
+ </target>
+ <target name="clean">
+ <delete dir="build"/>
+ </target>
+ <target depends="init" name="compile" description="compile all of the class files">
+ <echo> *** Compiling Pig UDFs ***</echo>
+ <javac srcdir="${src.dir}" debug="${javac.debug}" debuglevel="${javac.level}" destdir="${build.classes}" source="${javac.version}"
+ target="${javac.version}" optimize="${javac.optimize}" deprecation="${javac.deprecation}">
+ <compilerarg line="${javac.args} ${javac.args.warnings}" />
+ <classpath refid="pigudf.classpath"/>
+ </javac>
+ </target>
+ <target depends="init,compile" name="jar" description="create the jar files">
+ <echo> *** Creating pigudf.jar ***</echo>
+ <jar destfile="${udfjar}">
+ <fileset dir="build/classes"/>
+ </jar>
+ </target>
+ <target depends="compile" name="compile-test">
+ <echo> *** Compiling UDF tests ***</echo>
+ <javac srcdir="${test.src.dir}" debug="true" debuglevel="${debuglevel}" destdir="${test.classes}" source="${javac.version}" target="${javac.version}">
+ <classpath refid="pigudf.classpath"/>
+ </javac>
+ </target>
+ <target depends="compile-test,jar" name="test">
+ <echo> *** Running UDF tests ***</echo>
+ <delete dir="${test.logs}"/>
+ <mkdir dir="${test.logs}"/>
+ <junit printsummary="yes" haltonfailure="no" fork="yes" maxmemory="256m" dir="${basedir}" timeout="${test.timeout}" errorProperty="tests.failed" failureProperty="tests.failed">
+ <classpath refid="test.classpath"/>
+ <formatter type="${test.junit.output.format}" />
+ <batchtest fork="yes" todir="${test.logs}" unless="testcase">
+ <fileset dir="${test.src.dir}">
+ <include name="**/*Test*.java" />
+ </fileset>
+ </batchtest>
+ <batchtest fork="yes" todir="${test.logs}" if="testcase">
+ <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
+ </batchtest>
+ </junit>
+ <fail if="tests.failed">Tests failed!</fail>
+ </target>
+ <target depends="init" name="javadoc"
+ description="build javadoc for all of the packages">
+ <echo> *** Creating Javadocs ***</echo>
+ <javadoc destdir="build/javadoc"
+ author="true">
+ <fileset dir="${src.dir}/evaluation" includes="**/*.java"/>
+ <fileset dir="${src.dir}/storage" includes="**/*.java"/>
+ <fileset dir="${src.dir}/filtering" includes="**/*.java"/>
+ <fileset dir="${src.dir}/grouping" includes="**/*.java"/>
+ <fileset dir="${src.dir}/comparison" includes="**/*.java"/>
+ <classpath refid="pigudf.classpath"/>
+ </javadoc>
+ </target>
+</project>
186 .../piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.pig.Algebraic;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable;
+import org.apache.pig.data.*;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * MaxTupleBy1stField UDF returns a tuple with max value of the first field in a
+ * given bag.
+ *
+ * Caveat: first field assumed to have type 'long'. You may need to enforece this
+ * via schema when loading data, as sown in sample usage below.
+ *
+ * Sample usage:
+ *
+ * A = load 'test.tsv' as (first: long, second, third);
+ * B = GROUP A by second;
+ * C = FOREACH B GENERATE group, MaxTupleBy1stField(A);
+ *
+ * @author Vadim Zaliva <lord@codemindes.com>
+ */
+public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic
+{
+ /**
+ * Indicates once for how many items progress hartbeat should be sent.
+ */
+ private static final int PROGRESS_FREQUENCY = 10;
+
+ static public class Initial extends EvalFunc<Tuple>
+ {
+ //TODO: private static TupleFactory tfact = TupleFactory.getInstance();
+
+ @Override
+ public Tuple exec(Tuple input) throws IOException
+ {
+ try
+ {
+ // input is a bag with one tuple containing
+ // the column we are trying to max on
+ DataBag bg = (DataBag) input.get(0);
+ Tuple tp = bg.iterator().next();
+ return tp; //TODO: copy?
+ } catch(ExecException ee)
+ {
+ IOException oughtToBeEE = new IOException();
+ oughtToBeEE.initCause(ee);
+ throw oughtToBeEE;
+ }
+ }
+ }
+
+ public Schema outputSchema(Schema input)
+ {
+ return input;
+ }
+
+ static public class Intermediate extends EvalFunc<Tuple>
+ {
+ //TODO: private static TupleFactory tfact = TupleFactory.getInstance();
+
+ @Override
+ public Tuple exec(Tuple input) throws IOException
+ {
+ try
+ {
+ return max(input, reporter);
+ } catch(ExecException ee)
+ {
+ IOException oughtToBeEE = new IOException();
+ oughtToBeEE.initCause(ee);
+ throw oughtToBeEE;
+ }
+ }
+ }
+
+ static public class Final extends EvalFunc<Tuple>
+ {
+ @Override
+ public Tuple exec(Tuple input) throws IOException
+ {
+ try
+ {
+ return max(input, reporter);
+ } catch(ExecException ee)
+ {
+ IOException oughtToBeEE = new IOException();
+ oughtToBeEE.initCause(ee);
+ throw oughtToBeEE;
+ }
+ }
+ }
+
+ @Override
+ public Tuple exec(Tuple input) throws IOException
+ {
+ try
+ {
+ return max(input, reporter);
+ } catch(ExecException ee)
+ {
+ IOException oughtToBeEE = new IOException();
+ oughtToBeEE.initCause(ee);
+ throw oughtToBeEE;
+ }
+ }
+
+ protected static Tuple max(Tuple input, PigProgressable reporter) throws ExecException
+ {
+ DataBag values = (DataBag) input.get(0);
+
+ // if we were handed an empty bag, return NULL
+ // this is in compliance with SQL standard
+ if(values.size() == 0)
+ return null;
+
+ long curMax = 0;
+ Tuple curMaxTuple = null;
+ int n=0;
+ for(Iterator<Tuple> it = values.iterator(); it.hasNext();)
+ {
+ if(reporter!=null && ++n%PROGRESS_FREQUENCY==0)
+ reporter.progress();
+ Tuple t = it.next();
+ try
+ {
+ long d = (Long) t.get(0);
+ if(curMaxTuple == null || d > curMax)
+ {
+ curMax = d;
+ curMaxTuple = t;
+ }
+
+ } catch(RuntimeException exp)
+ {
+ ExecException newE = new ExecException("Error processing: " + t.toString() + exp.getMessage());
+ newE.initCause(exp);
+ throw newE;
+ }
+ }
+
+ return curMaxTuple;
+ }
+
+ @Override
+ public String getInitial()
+ {
+ return Initial.class.getName();
+ }
+
+ @Override
+ public String getIntermed()
+ {
+ return Intermediate.class.getName();
+ }
+
+ @Override
+ public String getFinal()
+ {
+ return Final.class.getName();
+ }
+
+}
103 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ABS.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.math;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.util.WrappedIOException;
+/**
+ * math.ABS implements a binding to the Java function
+ * {@link java.lang.Math#abs(double) Math.abs(double)} for computing the
+ * absolute value of the argument. The returned value will be a double which is
+ * absolute value of the input.
+ *
+ * <dl>
+ * <dt><b>Parameters:</b></dt>
+ * <dd><code>value</code> - <code>numeric</code>.</dd>
+ *
+ * <dt><b>Return Value:</b></dt>
+ * <dd><code>numeric</code> absolute value of input</dd>
+ *
+ * <dt><b>Return Schema:</b></dt>
+ * <dd>abs_inputSchema</dd>
+ *
+ * <dt><b>Example:</b></dt>
+ * <dd><code>
+ * register math.jar;<br/>
+ * A = load 'mydata' using PigStorage() as ( float1 );<br/>
+ * B = foreach A generate float1, math.ABS(float1);
+ * </code></dd>
+ * </dl>
+ *
+ * @see Math#abs(double)
+ * @see
+ * @author ajay garg
+ *
+ */
+public class ABS extends EvalFunc<Double>{
+ /**
+ * java level API
+ * @param input expects a single numeric value
+ * @param output returns a single numeric value, absolute value of the argument
+ */
+ public Double exec(Tuple input) throws IOException {
+ if (input == null || input.size() == 0)
+ return null;
+
+ Double d;
+ try{
+ d = DataType.toDouble(input.get(0));
+ } catch (NumberFormatException nfe){
+ System.err.println("Failed to process input; error - " + nfe.getMessage());
+ return null;
+ } catch (Exception e){
+ throw WrappedIOException.wrap("Caught exception processing input row ", e);
+ }
+
+ return Math.abs(d);
+ }
+
+ @Override
+ public Schema outputSchema(Schema input) {
+ return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.DOUBLE));
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+ */
+ @Override
+ public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+ List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+ funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.BYTEARRAY))));
+ funcList.add(new FuncSpec(DoubleAbs.class.getName(), new Schema(new Schema.FieldSchema(null, DataType.DOUBLE))));
+ funcList.add(new FuncSpec(FloatAbs.class.getName(), new Schema(new Schema.FieldSchema(null, DataType.FLOAT))));
+ funcList.add(new FuncSpec(IntAbs.class.getName(), new Schema(new Schema.FieldSchema(null, DataType.INTEGER))));
+ funcList.add(new FuncSpec(LongAbs.class.getName(), new Schema(new Schema.FieldSchema(null, DataType.LONG))));
+ return funcList;
+ }
+
+}
56 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ACOS.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.math;
+
+/**
+ * math.ACOS implements a binding to the Java function
+* {@link java.lang.Math#acos(double) Math.acos(double)} for computing the
+* arc cosine of value of the argument. The returned value will be a double which is
+* the arc cosine of the value of input.
+*
+* <dl>
+* <dt><b>Parameters:</b></dt>
+* <dd><code>value</code> - <code>Double</code>.</dd>
+*
+* <dt><b>Return Value:</b></dt>
+* <dd><code>Double</code> arc cosine of the value of input</dd>
+*
+* <dt><b>Return Schema:</b></dt>
+* <dd>acos_inputSchema</dd>
+*
+* <dt><b>Example:</b></dt>
+* <dd><code>
+* register math.jar;<br/>
+* A = load 'mydata' using PigStorage() as ( float1 );<br/>
+* B = foreach A generate float1, math.ACOS(float1);
+* </code></dd>
+* </dl>
+*
+* @see Math#acos(double)
+* @see
+* @author ajay garg
+*
+*/
+public class ACOS extends DoubleBase{
+
+ Double compute(Double input){
+ return Math.acos(input);
+
+ }
+}
54 contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ASIN.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.piggybank.evaluation.math;
+
+/**
+ * math.ASIN implements a binding to the Java function
+* {@link java.lang.Math#asin(double) Math.asin(double)} for computing the
+* arc sine of value of the argument. The returned value will be a double which is
+* the arc sine of the value of input.
+*
+* <dl>
+* <dt><b>Parameters:</b></dt>
+* <dd><code>value</code> - <code>Double</code>.</dd>
+*
+* <dt><b>Return Value:</b></dt>
+* <dd><code>Double</code> arc sine of the value of input</dd>
+*
+* <dt><b>Return Schema:</b></dt>
+* <dd>asin_inputSchema</dd>
+*
+* <dt><b>Example:</b></dt>
+* <dd><code>
+* register math.jar;<br/>
+* A = load 'mydata' using PigStorage() as ( float1 );<br/>
+* B = foreach A generate float1, math.ASIN(float1);
+* </code></dd>
+* </dl>
+*
+* @see Math#asin(double)
+* @see
+* @author