Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Change Oozie workflow to add partitions

  • Loading branch information...
commit 04fd5f33ff1305e4d83149a621cd3ac22c516f45 1 parent dc8cac6
Jon Natkins authored
View
2  oozie-workflows/add_partition.q
@@ -0,0 +1,2 @@
+ADD JAR ${JSON_SERDE};
+ALTER TABLE tweets ADD IF NOT EXISTS PARTITION (datehour = ${DATEHOUR}) LOCATION '${WFINPUT}';
View
12 oozie-workflows/coord-app.xml
@@ -16,13 +16,13 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<coordinator-app name="load-tweets-coord" frequency="${coord:hours(1)}"
+<coordinator-app name="add-partition-coord" frequency="${coord:hours(1)}"
start="${jobStart}" end="${jobEnd}"
timezone="UTC"
xmlns="uri:oozie:coordinator:0.1">
<datasets>
<dataset name="tweets" frequency="${coord:hours(1)}"
- initial-instance="${initialDataset}" timezone="UTC">
+ initial-instance="${initialDataset}" timezone="America/Los_Angeles">
<uri-template>hdfs://hadoop1:8020/user/flume/tweets/${YEAR}/${MONTH}/${DAY}/${HOUR}</uri-template>
<done-flag></done-flag>
</dataset>
@@ -31,7 +31,7 @@
<data-in name="input" dataset="tweets">
<!-- The integer value here should be the offset of your time zone from GMT,
so for PT, this value is -8 -->
- <instance>${coord:current(-8)}</instance>
+ <instance>${coord:current(coord:tzOffset() / 60)}</instance>
</data-in>
<data-in name="readyIndicator" dataset="tweets">
<!-- I've done something here that is a little bit of a hack. Since Flume
@@ -39,7 +39,7 @@
rolled to a new directory, we can just use the next directory as an
input event, which instructs Oozie not to kick off a coordinator
action until the next dataset starts being available. -->
- <instance>${coord:current(-7)}</instance>
+ <instance>${coord:current(1 + (coord:tzOffset() / 60))}</instance>
</data-in>
</input-events>
<action>
@@ -50,6 +50,10 @@
<name>wfInput</name>
<value>${coord:dataIn('input')}</value>
</property>
+ <property>
+ <name>dateHour</name>
+ <value>${coord:formatTime(coord:dateOffset(coord:nominalTime(), -8, 'HOUR'), 'yyyyMMddHH')}</value>
+ </property>
</configuration>
</workflow>
</action>
View
9 oozie-workflows/hive-action.xml
@@ -16,10 +16,10 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-load-tweets-wf">
- <start to="hive-load-tweets"/>
+<workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-add-partition-wf">
+ <start to="hive-add-partition"/>
- <action name="hive-load-tweets">
+ <action name="hive-add-partition">
<hive xmlns="uri:oozie:hive-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
@@ -34,9 +34,10 @@
<value>${workflowRoot}/hive-site.xml</value>
</property>
</configuration>
- <script>load_tweets.q</script>
+ <script>add_partition.q</script>
<param>JSON_SERDE=${workflowRoot}/lib/hive-serdes-1.0-SNAPSHOT.jar</param>
<param>WFINPUT=${wfInput}</param>
+ <param>DATEHOUR=${dateHour}</param>
</hive>
<ok to="end"/>
<error to="fail"/>
View
2  oozie-workflows/load_tweets.q
@@ -1,2 +0,0 @@
-ADD JAR ${JSON_SERDE};
-LOAD DATA INPATH '${WFINPUT}/*' INTO TABLE tweets;
Please sign in to comment.
Something went wrong with that request. Please try again.