Browse files

Type not mandatory in field definitions

  • Loading branch information...
elisehuard committed Dec 17, 2013
1 parent 0e0aae6 commit 5d9e2098430aa9d4b78476496ab2a3ed0c121639
@@ -82,7 +82,7 @@ data TupleDef = TupleDef [Field]
data Tuple = Tuple [Alias]
-data Field = Field Alias SimpleType
+data Field = Field Alias (Maybe SimpleType)
data Expression = Unary Operator Expression
@@ -245,9 +245,10 @@ tupleDef = sepBy field comma
field :: Parser Field
field = Field <$>
pigVar <*>
- ( char ':' *>
- whiteSpace *>
- pigType )
+ optionMaybe
+ ( char ':' *>
+ whiteSpace *>
+ pigType )
pigType :: Parser SimpleType
pigType = pigSimpleType "int" Int <|>
@@ -89,7 +89,8 @@ instance Treeable Tuple where
toTree (Tuple t) = Node "tuple" (map toTree t)
instance Treeable Field where
- toTree (Field (Identifier s) t) = Node ("field: " ++ s ++ " of type " ++ show t) []
+ toTree (Field (Identifier s) (Just t)) = Node ("field: " ++ s ++ " of type " ++ show t) []
+ toTree (Field (Identifier s) Nothing) = Node ("field: " ++ s) []
instance Treeable Expression where
toTree (Unary o e) = Node "unary expression" [toTree o, toTree e]
@@ -14,13 +14,15 @@ import Language.Pig.Parser.AST
parserSuite :: Test
parserSuite = testGroup "Parser"
[testCase "load statement 1" (testStmt "users = LOAD 'sorted_log/user_registration/$date/*' USING LogStorage() AS (date:chararray, time:chararray, user_id:long);"
- "Seq [Assignment (Identifier \"users\") (LoadClause (Filename \"sorted_log/user_registration/$date/*\") (Just (Function \"LogStorage\" [])) (Just (TupleDef [Field (Identifier \"date\") CharArray,Field (Identifier \"time\") CharArray,Field (Identifier \"user_id\") Long])))]")
+ "Seq [Assignment (Identifier \"users\") (LoadClause (Filename \"sorted_log/user_registration/$date/*\") (Just (Function \"LogStorage\" [])) (Just (TupleDef [Field (Identifier \"date\") (Just CharArray),Field (Identifier \"time\") (Just CharArray),Field (Identifier \"user_id\") (Just Long)])))]")
, testCase "load statement 2" (testStmt "active_users = LOAD 'warehouse/active_users/daily/point/{$visit_dates}*' USING ColumnStorage(' ') AS (date:chararray, user_id:long);"
- "Seq [Assignment (Identifier \"active_users\") (LoadClause (Filename \"warehouse/active_users/daily/point/{$visit_dates}*\") (Just (Function \"ColumnStorage\" [ScalarTerm (String \" \")])) (Just (TupleDef [Field (Identifier \"date\") CharArray,Field (Identifier \"user_id\") Long])))]")
+ "Seq [Assignment (Identifier \"active_users\") (LoadClause (Filename \"warehouse/active_users/daily/point/{$visit_dates}*\") (Just (Function \"ColumnStorage\" [ScalarTerm (String \" \")])) (Just (TupleDef [Field (Identifier \"date\") (Just CharArray),Field (Identifier \"user_id\") (Just Long)])))]")
, testCase "bare load statement" (testStmt "users = LOAD '$users_input' USING ColumnStorage(' ');"
"Seq [Assignment (Identifier \"users\") (LoadClause (Filename \"$users_input\") (Just (Function \"ColumnStorage\" [ScalarTerm (String \" \")])) Nothing)]")
+ , testCase "load statement - type inference" (testStmt "i18n = LOAD '$i18n_input' USING LogStorage(' ') AS (date:chararray, time, machine, log_type, log_status, function, event:chararray, user_id:bytearray, language_code, ip:chararray, uuid, accept_language);"
+ "Seq [Assignment (Identifier \"i18n\") (LoadClause (Filename \"$i18n_input\") (Just (Function \"LogStorage\" [ScalarTerm (String \" \")])) (Just (TupleDef [Field (Identifier \"date\") (Just CharArray),Field (Identifier \"time\") Nothing,Field (Identifier \"machine\") Nothing,Field (Identifier \"log_type\") Nothing,Field (Identifier \"log_status\") Nothing,Field (Identifier \"function\") Nothing,Field (Identifier \"event\") (Just CharArray),Field (Identifier \"user_id\") (Just ByteArray),Field (Identifier \"language_code\") Nothing,Field (Identifier \"ip\") (Just CharArray),Field (Identifier \"uuid\") Nothing,Field (Identifier \"accept_language\") Nothing])))]")
, testCase "foreach stmt with flatten" (testStmt "users = FOREACH users GENERATE FLATTEN(group) AS (date, herd);"
"Seq [Assignment (Identifier \"users\") (ForeachClause (Identifier \"users\") (GenBlock [Flatten \"group\" (Tuple [Identifier \"date\",Identifier \"herd\"])]))]")
@@ -69,7 +71,7 @@ desktop_client_dates = FOREACH desktop_client GENERATE server_date AS server_dat
"Seq [DefineUDF (Identifier \"RESOLVE\") (AliasCommand (Exec \"python -f 4 -d \\\"\\t\\\"\")) [Ship [Filename \"\",Filename \"GeoLiteCity.dat\"]]]")
, testCase "stream stmt" (testStmt "report = STREAM report THROUGH RESOLVE AS (day:chararray, herd:chararray, day_visits:int, visits:int);"
- "Seq [Assignment (Identifier \"report\") (StreamClause (Identifier \"report\") (Identifier \"RESOLVE\") (TupleDef [Field (Identifier \"day\") CharArray,Field (Identifier \"herd\") CharArray,Field (Identifier \"day_visits\") Int,Field (Identifier \"visits\") Int]))]")
+ "Seq [Assignment (Identifier \"report\") (StreamClause (Identifier \"report\") (Identifier \"RESOLVE\") (TupleDef [Field (Identifier \"day\") (Just CharArray),Field (Identifier \"herd\") (Just CharArray),Field (Identifier \"day_visits\") (Just Int),Field (Identifier \"visits\") (Just Int)]))]")
, testCase "distinct stmt" (testStmt "desktop_client_dates3 = DISTINCT desktop_client_dates2;"
"Seq [Assignment (Identifier \"desktop_client_dates3\") (DistinctClause (Identifier \"desktop_client_dates2\"))]")
@@ -82,7 +84,7 @@ desktop_client_dates = FOREACH desktop_client GENERATE server_date AS server_dat
, testCase "filter stmt" (testStmt "users = FILTER users BY registration_date >= '$users_date';"
"Seq [Assignment (Identifier \"users\") (FilterClause (Identifier \"users\") (BooleanExpression GreaterEqual (AliasTerm (Identifier \"registration_date\")) (ScalarTerm (String \"$users_date\"))))]")
, testCase "several statements" (testStmt "active_users = LOAD 'warehouse/active_users/daily/point/{$visit_dates}*' USING ColumnStorage(' ') AS (date:chararray, user_id:long);\nactive_users = JOIN users BY user_id, active_users BY user_id;"
- "Seq [Assignment (Identifier \"active_users\") (LoadClause (Filename \"warehouse/active_users/daily/point/{$visit_dates}*\") (Just (Function \"ColumnStorage\" [ScalarTerm (String \" \")])) (Just (TupleDef [Field (Identifier \"date\") CharArray,Field (Identifier \"user_id\") Long]))),Assignment (Identifier \"active_users\") (InnerJoinClause [Join \"users\" \"user_id\",Join \"active_users\" \"user_id\"])]")
+ "Seq [Assignment (Identifier \"active_users\") (LoadClause (Filename \"warehouse/active_users/daily/point/{$visit_dates}*\") (Just (Function \"ColumnStorage\" [ScalarTerm (String \" \")])) (Just (TupleDef [Field (Identifier \"date\") (Just CharArray),Field (Identifier \"user_id\") (Just Long)]))),Assignment (Identifier \"active_users\") (InnerJoinClause [Join \"users\" \"user_id\",Join \"active_users\" \"user_id\"])]")
, testCase "case insensitivity of keywords" (testStmt "store report into '$output' using ColumnStorage(',');"
"Seq [Store (Identifier \"report\") (Directory \"$output\") (Function \"ColumnStorage\" [ScalarTerm (String \",\")])]")
, testCase "input file path" (testFilePath "example.pig" "example.pig")
@@ -17,7 +17,7 @@ prettyPrintSuite :: Test
prettyPrintSuite = testGroup "pretty print"
testCase "no statements" (testPrint (Seq []) "no statements\n")
- , testCase "load statement" (testPrint (Seq [Assignment (Identifier "active_users") (LoadClause (Filename "warehouse/active_users/daily/point/{$visit_dates}*") (Just (Function "ColumnStorage" [ScalarTerm (String " ")])) (Just (TupleDef [Field (Identifier "date") CharArray,Field (Identifier "user_id") Long])))])
+ , testCase "load statement" (testPrint (Seq [Assignment (Identifier "active_users") (LoadClause (Filename "warehouse/active_users/daily/point/{$visit_dates}*") (Just (Function "ColumnStorage" [ScalarTerm (String " ")])) (Just (TupleDef [Field (Identifier "date") (Just CharArray),Field (Identifier "user_id") (Just Long)])))])
"sequence of statements:\n assignment \n | \n -------------------------------------------------------------------------------------- \n / \\ \nidentifier: active_users LOAD clause \n | \n ------------------------------------------------------------------------------------- \n / | \\ \n filename: \"warehouse/active_users/daily/point/{$visit_dates}*\" function ColumnStorage tuple def \n | | \n scalar: string ----------------------------- \n / \\ \n field: date of type CharArray field: user_id of type Long\n")
, testCase "expression" (testPrint (Seq [Assignment (Identifier "users") (ForeachClause (Identifier "users") (GenBlock [TupleFieldGlob,ExpressionTransform (Binary Divide (Binary Modulo (AliasTerm (Identifier "user_id")) (ScalarTerm (Number (Left 100)))) (ScalarTerm (Number (Left 10)))) (Identifier "cohort")]))])
"sequence of statements:\n assignment \n | \n ----------------------------------------------------------- \n / \\ \nidentifier: users FOREACH clause \n | \n -------------------------------------------------- \n / \\ \n identifier: users transformation block \n | \n ---------------------------------------- \n / \\ \n * calculate \n | \n --------------------------------------- \n / \\ \n binary expression identifier: cohort\n | \n -------------------------------------------------- \n / | \\ \n Divide binary expression double:10 \n | \n ------------------------------ \n / | \\ \n Modulo identifier: user_id double:100 \n")

0 comments on commit 5d9e209

Please sign in to comment.