Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 481 lines (417 sloc) 12.3 KB
#!/usr/bin/env perl
use strict;
use warnings;
package IOReader {
my $READ_SIZE = 2**20;
sub new {
my ($class, $fh) = @_;
return bless {
fh => $fh,
buf => "",
buf_cursor => 0,
}, $class;
}
my $__purge_mark = int $READ_SIZE * 0.9;
sub purge {
my ($self) = @_;
return unless $self->{buf_cursor} > $__purge_mark;
my $buf2 = substr($self->{buf}, $self->{buf_cursor});
$self->{buf} = $buf2;
$self->{buf_cursor} = 0;
$self->readmore;
}
sub gist {
my ($self) = @_;
return substr($self->{buf}, $self->{buf_cursor}-15, 32);
}
sub readmore {
my ($self) = @_;
return 0 if $self->{eof};
my $buf2 = '';
my $nread = read($self->{fh}, $buf2, $READ_SIZE);
if ($nread) {
$self->{buf} .= $buf2;
return $nread;
}
$self->{eof} = 1;
return 0;
}
sub lookc {
my ($self) = @_;
if (!$self->{eof} && length($self->{buf}) - $self->{buf_cursor} < 100) {
$self->readmore();
}
return substr($self->{buf}, $self->{buf_cursor}, 1);
}
sub lookc_eq {
my ($self, $c) = @_;
if (!$self->{eof} && length($self->{buf}) - $self->{buf_cursor} < 100) {
$self->readmore();
}
return substr($self->{buf}, $self->{buf_cursor}, 1) eq $c;
}
sub inc_cursor {
my ($self) = @_;
$self->{buf_cursor}++;
}
sub getc {
my ($self) = @_;
my $c = $self->lookc();
$self->inc_cursor;
return $c;
}
}
package JSONScanner {
my $digits_onenine = [qw(1 2 3 4 5 6 7 8 9)];
my $digits_zeronine = ['0', @$digits_onenine];
my $hex_digits = [@$digits_zeronine, qw(A B C D E F a b c d e f)];
sub eq_any {
my ($item, $ref_array) = @_;
my $i;
for ($i = 0; $i < @$ref_array; $i++) {
last if $item eq $ref_array->[$i];
}
return $i < @$ref_array;
}
sub new {
my ($class, $fh) = @_;
return bless {
io => IOReader->new($fh),
json_cursor => 0,
event_handler => undef,
}, $class;
}
sub emit {
my ($self, $event_name, $tok) = @_;
my $cb = $self->{event_handler} or return;
$cb->($event_name, $tok);
}
sub scan_ws {
my ($self) = @_;
my $tok = '';
my $io = $self->{io};
pos($io->{buf}) = $io->{buf_cursor};
if ($io->{buf} =~ m/\G([\x{0020}\x{000a}\x{000d}\x{0009}]+)/) {
$tok = $1;
$io->{buf_cursor} += length($tok);
}
}
sub scan_elements {
my ($self) = @_;
my $io = $self->{io};
$self->scan_element;
my $c = $io->lookc();
while ($c eq ',') {
$io->inc_cursor;
$self->emit(elements_delimiter => $c);
$self->scan_element;
$c = $io->lookc();
}
}
sub scan_json {
my ($self) = @_;
$self->{io}->readmore();
$self->emit('begin_json');
$self->scan_element;
$self->emit('end_json');
}
sub scan_element {
my ($self) = @_;
$self->emit('begin_element');
$self->scan_ws;
$self->scan_value;
$self->scan_ws;
$self->emit('end_element');
}
sub scan_value {
my ($self) = @_;
my $io = $self->{io};
my $c = $io->lookc;
$self->emit('begin_value');
if ($c eq 't') {
$io->inc_cursor;
my $expected = 'true';
my $tok = $c;
for (1,2,3) {
$c = $io->getc;
$c eq substr($expected, $_, 1) or die "Expecting: true";
$tok .= $c;
}
$self->emit(value_true => $tok);
} elsif ($c eq 'f') {
$io->inc_cursor;
my $expected = 'false';
my $tok = $c;
for (1,2,3,4) {
$c = $io->getc;
$c eq substr($expected, $_, 1) or die "Expecting: false";
$tok .= $c;
}
$self->emit(value_false => $tok);
} elsif ($c eq 'n') {
$io->inc_cursor;
my $expected = 'null';
my $tok = $c;
for (1,2,3) {
$c = $io->getc;
$c eq substr($expected, $_, 1) or die "Expecting: null";
$tok .= $c;
}
$self->emit(value_null => $tok);
} elsif ($c eq '[') {
$self->scan_array;
} elsif ($c eq '{') {
$self->scan_object;
} elsif ($c eq '"') {
$self->scan_string;
} else {
$self->scan_number;
}
$self->emit('end_value');
}
sub scan_number {
my ($self) = @_;
my $io = $self->{io};
my $tok = '';
my $c = $io->lookc;
if ($c eq '-') {
$io->inc_cursor;
$tok .= $c;
}
$c = $io->lookc;
if ($c =~ m/\A[123456789]\z/) {
$io->inc_cursor;
$tok .= $c;
$c = $io->lookc;
if ($c =~ m/\A[0123456789]\z/) {
$tok .= $self->scan_digits;
}
} elsif ($c eq '0') {
$io->inc_cursor;
$tok .= $c;
} else {
die "Expect a digit: $c";
}
$c = $io->lookc;
if ($c eq '.') {
$io->inc_cursor;
$tok .= $c;
$tok .= $self->scan_digits;
$c = $io->lookc;
}
if ($c eq 'e' || $c eq 'E') {
$io->inc_cursor;
$tok .= $c;
$c = $io->getc;
if ($c eq '+' || $c eq '-') {
$tok .= $c;
}
$tok .= $self->scan_digits;
}
$self->emit(number => $tok);
}
sub scan_digits {
my ($self) = @_;
my $tok = '';
my $io = $self->{io};
pos($io->{buf}) = $io->{buf_cursor};
if ($io->{buf} =~ /\G([0123456789]+)/) {
$tok .= $1;
$io->{buf_cursor} += length($1);
} else {
die "Expecting digits";
}
return $tok;
}
sub scan_string {
my ($self) = @_;
my $tok = '';
my $io = $self->{io};
## valid utf8 sequance -- https://stackoverflow.com/questions/13116685/regexp-to-check-if-code-contains-non-utf-8-characters
## /^([\x00-\x7F]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|[\xEE-\xEF][\x80-\xBF]{2}|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/
###
### scan_characters;
## \x22 " , \x5c \
pos($io->{buf}) = $io->{buf_cursor};
if ($io->{buf} =~ m#\G
(\"
(
(?: [^\x00-\x19\x22\x5c] ) |
(?: \\ (?: u[0123456789ABCDEFabcdef]{4} | [\\\"\/bnrt] ))
)*
\")
#x) {
$tok .= $1;
$io->{buf_cursor} += length($1);
}
### /scan_characters
die(q<Expecting: string. Got: [[> . $io->gist . q']]') unless $tok ne '';
$self->emit(string => $tok);
return $tok;
}
sub scan_array {
my ($self) = @_;
my $io = $self->{io};
my $c = $io->getc;
die q<Expecting '['> unless $c eq '[';
$self->emit('begin_array', $c);
$self->scan_ws;
$c = $io->lookc;
if ($c eq ']') {
$io->inc_cursor;
$self->emit('end_array', $c);
} else {
$self->scan_elements;
$c = $io->getc;
die q<Expecting ']'> unless $c eq ']';
$self->emit('end_array', $c);
}
}
sub scan_members {
my ($self) = @_;
my $io = $self->{io};
$self->scan_member;
while ($io->lookc_eq(',')) {
$io->inc_cursor;
$self->emit(members_delimiter => ',');
$self->scan_member;
}
}
sub scan_member {
my ($self) = @_;
my $io = $self->{io};
$self->emit('begin_member');
$self->scan_ws;
$self->scan_string;
$self->scan_ws;
my $c = $io->getc();
die q<Expecting: ':'> unless $c eq ':';
$self->emit(member_delimiter => $c);
$self->scan_element;
$self->emit('end_member');
}
sub scan_object {
my ($self) = @_;
my $io = $self->{io};
my $c = $io->getc;
die q<Expecting '{'> unless $c eq '{';
$self->emit('begin_object', $c);
$self->scan_ws;
$c = $io->lookc;
if ($c eq '}') {
$io->inc_cursor;
$self->emit('end_object', $c);
} else {
### scan_members
# $self->scan_members;
$self->scan_member;
while ($io->lookc_eq(',')) {
$io->inc_cursor;
$self->emit(members_delimiter => ',');
$self->scan_member;
}
### /scan_members
$c = $io->getc;
die q<Expecting '}'> unless $c eq '}';
$self->emit('end_object', $c);
}
}
sub purge {
my ($self) = @_;
$self->{io}->purge;
}
}
package JSONPrinter {
sub new {
my ($class, $fh) = @_;
my $scanner = JSONScanner->new($fh);
return bless { scanner => $scanner }, $class;
}
sub prettify {
my ($self) = @_;
my $indent = 0;
my $tok_print = '';
my $after_member_delimiter = 0;
my $previous_event = '';
my $handlers = +{
before => {
end_array => sub {
$indent--;
print "\n" if $previous_event eq 'begin_array';
print "\n" . ' ' x $indent;
},
end_object => sub {
$indent--;
print "\n" if $previous_event eq 'begin_object';
print "\n" . ' ' x $indent;
},
end_json => sub {
print "\n";
},
begin_member => sub {
print "\n" . ' 'x $indent if $previous_event ne 'member_delimiter';
},
begin_element => sub {
if ($previous_event eq 'begin_array') {
print "\n";
}
if ($previous_event eq 'member_delimiter') {
print ' ';
}
else {
print ' 'x$indent;
}
}
},
after => {
begin_object => sub {
$indent++;
},
begin_array => sub {
$indent++;
},
elements_delimiter => sub {
print "\n";
},
}
};
$self->{scanner}->{event_handler} = sub {
my ($event_name, $tok) = @_;
$tok_print = $tok;
my $cb = $handlers->{before}{$event_name};
$cb->($tok) if $cb;
if (defined $tok) {
print $tok_print;
}
$cb = $handlers->{after}{$event_name};
$cb->($tok) if $cb;
$previous_event = $event_name;
$self->{scanner}->purge if ($event_name eq "end_object");
};
$self->{scanner}->scan_json();
}
sub minify {
my ($self) = @_;
$self->{scanner}{event_handler} = sub {
my ($event_name, $tok) = @_;
return unless defined($tok);
print $tok;
$self->{scanner}->purge if ($event_name eq "end_object");
};
$self->{scanner}->scan_json;
}
}
my $json_printer = JSONPrinter->new(\*STDIN);
if (grep { $_ =~ /\A(-m|--minify)\z/ } @ARGV) {
$json_printer->minify;
} else {
$json_printer->prettify;
}
__END__
=head1 Usage
json-print [--minify]
Prettify:
cat data.json | json-print
Minify (remove whitespaces):
cat data.json | json-print --minify
=cut