This repository has been archived by the owner on May 12, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 35
/
PgeConfig_RatAggregator.xml
49 lines (41 loc) · 2.16 KB
/
PgeConfig_RatAggregator.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
<?xml version="1.0" encoding="UTF-8"?>
<pgeConfig>
<!-- How to run the PGE -->
<exe dir="[JobDir]" shell="/bin/bash">
<cmd>export PATH=/Users/mattmann/bin/:${PATH}</cmd>
<cmd>shopt -s expand_aliases</cmd>
<cmd>echo "Creating working dirs"</cmd>
<cmd>mkdir [JobInputDir] ; mkdir [JobOutputDir]; mkdir [JobLogDir]</cmd>
<cmd>echo "Running RAT aggregator"</cmd>
<cmd>[RatAggregatorScript] `python -c "print ' '.join('[InputFiles]'.split(','))"` > [JobOutputDir]/rat_aggregate_stats_[DateMilis].csv</cmd>
</exe>
<!-- Files to ingest -->
<output>
<!-- one or more of these -->
<dir path="[JobOutputDir]" createBeforeExe="false">
<files regExp=".*\.csv" metFileWriterClass="org.apache.oodt.cas.pge.writers.metlist.MetadataListPcsMetFileWriter" args="/usr/local/xdata-code-audit/deploy/pge/config/metout/rat_aggregate_log_metout.xml"/>
</dir>
</output>
<!-- Custom metadata to add to output files -->
<customMetadata>
<!-- helpful keys -->
<metadata key="LessThan" val="<"/>
<metadata key="LessThanOrEqualTo" val="[LessThan]="/>
<metadata key="GreaterThan" val=">"/>
<metadata key="GreaterThanOrEqualTo" val="[GreaterThan]="/>
<metadata key="Exclamation" val="!"/>
<metadata key="Ampersand" val="&"/>
<metadata key="NotEqualTo" val="[Ampersand]="/>
<metadata key="LogicalAnd" val="[Ampersand][Ampersand]"/>
<metadata key="CshPipeToStdOutAndError" val="[GreaterThan][Ampersand][Exclamation]"/>
<!-- Casi-specific keys -->
<metadata key="ProductionDateTime" val="[DATE.UTC]"/>
<metadata key="DateMilis" val="[DATE_TO_MILLIS([ProductionDateTime],UTC_FORMAT,1970-01-01)]"/>
<metadata key="JobDir" val="/data/jobs/rataggregate/[DateMilis]"/>
<metadata key="JobInputDir" val="[JobDir]/input"/>
<metadata key="JobOutputDir" val="[JobDir]/output"/>
<metadata key="JobLogDir" val="[JobDir]/logs"/>
<metadata key="ProductType" val="RatAggregateLog"/>
<metadata key="InputFiles" val="SQL(FORMAT='$FileLocation/$Filename',SORT_BY='CAS.ProductReceivedTime'){SELECT FileLocation,Filename,CAS.ProductReceivedTime FROM RatLog}"/>
</customMetadata>
</pgeConfig>