Skip to content

Commit

Permalink
A mirroring program that directly produces flat output directly.
Browse files Browse the repository at this point in the history
  • Loading branch information
gugod committed Feb 29, 2020
1 parent 0763d5c commit 7d9c952
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 0 deletions.
99 changes: 99 additions & 0 deletions bin/mirror.pl
@@ -0,0 +1,99 @@
#!/usr/bin/env perl
use v5.14;
use strict;
use warnings;
use autodie;

use FindBin;
use JSON;
use Try::Tiny;
use HTTP::Tiny;
use Path::Tiny qw(path);
use Getopt::Std;

sub fetch {
my $url = shift;
my $ua = HTTP::Tiny->new;
my $response = $ua->get($url);
if ($response->{success}) {
return $response->{content};
}
warn "fetch failed: url = $url";
return;
}

sub process {
my ($processor, $content) = @_;
my $plugin = "$FindBin::Bin/../processor/${processor}";
my $proc = do "$plugin";

unless ('CODE' eq ref($proc)) {
die "$plugin load fail: $@" if $@;
die "$plugin needs to return a sub.";
}

return $proc->($content);
}

sub current_time {
my (undef, $min, $hour, $mday, $mon, $year) = localtime();
$year += 1900;
$mon += 1;

return (
sprintf('%04d-%02d-%02d', $year, $mon, $mday),
sprintf('%02d-%02d-%02d', $hour, $min, 0),
);
}

sub output_file {
my ($dir, $collection, $dataset, $format) = @_;
my ($ymd, $hms) = current_time();
my $path = path($dir)->child($collection, $dataset, $ymd, $hms, $dataset . '.' . $format);
$path->parent->mkpath;
return $path;
}

sub work {
my ($output_directory, $dataset) = @_;

say "FETCH: $dataset->{url}";
my $content = fetch($dataset->{url}) or return;

for my $step (@{ $dataset->{workflow} }) {
my $output = process( $step->{processor}, $content ) or next;

my $p = output_file(
$output_directory,
$dataset->{collection},
$dataset->{name},
$step->{format},
);

$p->spew($output);
say "SAVED: $p";
}
}

sub HELP_MESSAGE {
die "$0 -c etc/data.json -o /data\n";
}

my %opts;
getopts("vgho:c:j:", \%opts);

unless ($opts{c} && $opts{o}) {
HELP_MESSAGE();
}

my $datasets = JSON->new->utf8->decode( scalar path($opts{c})->slurp );

for my $dataset (@$datasets) {
say "START: $dataset->{collection} / $dataset->{name}";
try {
work($opts{o}, $dataset);
} catch {
warn "ERROR: $_";
};
say "DONE: $dataset->{collection} / $dataset->{name}";
}
2 changes: 2 additions & 0 deletions cpanfile
Expand Up @@ -9,3 +9,5 @@ requires "Text::CSV" => 0;
requires "URI" => 0;
requires "XML::Loy" => 0;
requires "YAML" => 0;
requires "Path::Tiny" => 0;
requires "Try::Tiny" => 0;
50 changes: 50 additions & 0 deletions etc/data-minutely.json
@@ -0,0 +1,50 @@
[
{
"collection": "tisv",
"name": "roadlevel_value",
"description": "TISV 路段一分鐘動態資訊",
"comment": "Data source http://tisvcloud.freeway.gov.tw/",
"refresh": "minutely",
"url": "http://tisvcloud.freeway.gov.tw/roadlevel_value.xml.gz",
"workflow": [
{
"processor": "gzip-decompress",
"format": "xml"
}
]
},
{
"collection": "aec",
"name": "gammamonitor",
"description": "全國環境輻射偵測",
"refresh": "minutely",
"url": "https://www.aec.gov.tw/open/gammamonitor.csv",
"workflow": [
{
"processor": "save-as-is",
"format": "csv"
},
{
"processor": "gammamonitor-csv-to-json",
"format": "json"
}
]
},
{
"collection": "aec",
"name": "spds",
"description": "核能電廠運轉資訊",
"url": "https://www.aec.gov.tw/open/spds.csv",
"refresh": "minutely",
"workflow": [
{
"processor": "save-as-is",
"format": "csv"
},
{
"processor": "spds-csv-to-json",
"format": "json"
}
]
}
]
3 changes: 3 additions & 0 deletions processor/save-as-is
@@ -0,0 +1,3 @@
sub {
return $_[0];
}

0 comments on commit 7d9c952

Please sign in to comment.