Permalink
Browse files

filter clause

  • Loading branch information...
1 parent bef2360 commit 0e0aae69a80083b3979f43751dccd0be51ee7489 @elisehuard committed Dec 17, 2013
Showing with 12 additions and 1 deletion.
  1. +1 −0 src/Language/Pig/Parser/AST.hs
  2. +9 −1 src/Language/Pig/Parser/Parser.hs
  3. +2 −0 test/Language/Pig/Parser/Test.hs
@@ -28,6 +28,7 @@ data OpClause = LoadClause Path (Maybe Function) (Maybe TupleDef)
| InnerJoinClause [Join]
| StreamClause Alias Alias TupleDef
| DistinctClause Alias
+ | FilterClause Alias BooleanExpression
DERIVE
data GenBlock = GenBlock [Transform]
@@ -39,6 +39,7 @@ pigLanguageDef = emptyDef {
"STORE", "INTO", "USING",
"REGISTER",
"DISTINCT",
+ "FILTER",
"int", "long", "float", "double", "chararray", "bytearray", "*"]
, Token.reservedOpNames = ["=", "+", "-", "*", "/", "%", "?", ":", "and", "or", "not"]
, Token.caseSensitive = False
@@ -142,6 +143,7 @@ opClause = loadClause
<|> groupClause
<|> streamClause
<|> distinctClause
+ <|> filterClause
loadClause :: Parser OpClause
loadClause = LoadClause <$>
@@ -152,7 +154,6 @@ loadClause = LoadClause <$>
(optionMaybe (reserved "AS" *>
pigTupleDef))
-
-- foreach: only the block (outer bag) version
foreachClause :: Parser OpClause
foreachClause = ForeachClause <$>
@@ -187,6 +188,13 @@ distinctClause = DistinctClause <$>
(reserved "DISTINCT" *>
pigVar)
+filterClause :: Parser OpClause
+filterClause = FilterClause <$>
+ (reserved "FILTER" *>
+ pigVar) <*>
+ (reserved "BY" *>
+ booleanExpression)
+
joinTable :: Parser Join
joinTable = Join <$>
pigIdentifier <*>
@@ -79,6 +79,8 @@ desktop_client_dates = FOREACH desktop_client GENERATE server_date AS server_dat
, testCase "register stmt" (testStmt "REGISTER 'lib/datafu-0.0.10.jar';"
"Seq [Register (Library \"lib/datafu-0.0.10.jar\")]")
+ , testCase "filter stmt" (testStmt "users = FILTER users BY registration_date >= '$users_date';"
+ "Seq [Assignment (Identifier \"users\") (FilterClause (Identifier \"users\") (BooleanExpression GreaterEqual (AliasTerm (Identifier \"registration_date\")) (ScalarTerm (String \"$users_date\"))))]")
, testCase "several statements" (testStmt "active_users = LOAD 'warehouse/active_users/daily/point/{$visit_dates}*' USING ColumnStorage(' ') AS (date:chararray, user_id:long);\nactive_users = JOIN users BY user_id, active_users BY user_id;"
"Seq [Assignment (Identifier \"active_users\") (LoadClause (Filename \"warehouse/active_users/daily/point/{$visit_dates}*\") (Just (Function \"ColumnStorage\" [ScalarTerm (String \" \")])) (Just (TupleDef [Field (Identifier \"date\") CharArray,Field (Identifier \"user_id\") Long]))),Assignment (Identifier \"active_users\") (InnerJoinClause [Join \"users\" \"user_id\",Join \"active_users\" \"user_id\"])]")
, testCase "case insensitivity of keywords" (testStmt "store report into '$output' using ColumnStorage(',');"

0 comments on commit 0e0aae6

Please sign in to comment.