Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A mirroring program that directly produces flat output directly.
- Loading branch information
Showing
4 changed files
with
154 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env perl | ||
use v5.14; | ||
use strict; | ||
use warnings; | ||
use autodie; | ||
|
||
use FindBin; | ||
use JSON; | ||
use Try::Tiny; | ||
use HTTP::Tiny; | ||
use Path::Tiny qw(path); | ||
use Getopt::Std; | ||
|
||
sub fetch { | ||
my $url = shift; | ||
my $ua = HTTP::Tiny->new; | ||
my $response = $ua->get($url); | ||
if ($response->{success}) { | ||
return $response->{content}; | ||
} | ||
warn "fetch failed: url = $url"; | ||
return; | ||
} | ||
|
||
sub process { | ||
my ($processor, $content) = @_; | ||
my $plugin = "$FindBin::Bin/../processor/${processor}"; | ||
my $proc = do "$plugin"; | ||
|
||
unless ('CODE' eq ref($proc)) { | ||
die "$plugin load fail: $@" if $@; | ||
die "$plugin needs to return a sub."; | ||
} | ||
|
||
return $proc->($content); | ||
} | ||
|
||
sub current_time { | ||
my (undef, $min, $hour, $mday, $mon, $year) = localtime(); | ||
$year += 1900; | ||
$mon += 1; | ||
|
||
return ( | ||
sprintf('%04d-%02d-%02d', $year, $mon, $mday), | ||
sprintf('%02d-%02d-%02d', $hour, $min, 0), | ||
); | ||
} | ||
|
||
sub output_file { | ||
my ($dir, $collection, $dataset, $format) = @_; | ||
my ($ymd, $hms) = current_time(); | ||
my $path = path($dir)->child($collection, $dataset, $ymd, $hms, $dataset . '.' . $format); | ||
$path->parent->mkpath; | ||
return $path; | ||
} | ||
|
||
sub work { | ||
my ($output_directory, $dataset) = @_; | ||
|
||
say "FETCH: $dataset->{url}"; | ||
my $content = fetch($dataset->{url}) or return; | ||
|
||
for my $step (@{ $dataset->{workflow} }) { | ||
my $output = process( $step->{processor}, $content ) or next; | ||
|
||
my $p = output_file( | ||
$output_directory, | ||
$dataset->{collection}, | ||
$dataset->{name}, | ||
$step->{format}, | ||
); | ||
|
||
$p->spew($output); | ||
say "SAVED: $p"; | ||
} | ||
} | ||
|
||
sub HELP_MESSAGE { | ||
die "$0 -c etc/data.json -o /data\n"; | ||
} | ||
|
||
my %opts; | ||
getopts("vgho:c:j:", \%opts); | ||
|
||
unless ($opts{c} && $opts{o}) { | ||
HELP_MESSAGE(); | ||
} | ||
|
||
my $datasets = JSON->new->utf8->decode( scalar path($opts{c})->slurp ); | ||
|
||
for my $dataset (@$datasets) { | ||
say "START: $dataset->{collection} / $dataset->{name}"; | ||
try { | ||
work($opts{o}, $dataset); | ||
} catch { | ||
warn "ERROR: $_"; | ||
}; | ||
say "DONE: $dataset->{collection} / $dataset->{name}"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
[ | ||
{ | ||
"collection": "tisv", | ||
"name": "roadlevel_value", | ||
"description": "TISV 路段一分鐘動態資訊", | ||
"comment": "Data source http://tisvcloud.freeway.gov.tw/", | ||
"refresh": "minutely", | ||
"url": "http://tisvcloud.freeway.gov.tw/roadlevel_value.xml.gz", | ||
"workflow": [ | ||
{ | ||
"processor": "gzip-decompress", | ||
"format": "xml" | ||
} | ||
] | ||
}, | ||
{ | ||
"collection": "aec", | ||
"name": "gammamonitor", | ||
"description": "全國環境輻射偵測", | ||
"refresh": "minutely", | ||
"url": "https://www.aec.gov.tw/open/gammamonitor.csv", | ||
"workflow": [ | ||
{ | ||
"processor": "save-as-is", | ||
"format": "csv" | ||
}, | ||
{ | ||
"processor": "gammamonitor-csv-to-json", | ||
"format": "json" | ||
} | ||
] | ||
}, | ||
{ | ||
"collection": "aec", | ||
"name": "spds", | ||
"description": "核能電廠運轉資訊", | ||
"url": "https://www.aec.gov.tw/open/spds.csv", | ||
"refresh": "minutely", | ||
"workflow": [ | ||
{ | ||
"processor": "save-as-is", | ||
"format": "csv" | ||
}, | ||
{ | ||
"processor": "spds-csv-to-json", | ||
"format": "json" | ||
} | ||
] | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
sub { | ||
return $_[0]; | ||
} |