Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
397 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,350 @@ | ||
.finos.dep.include"util/util.q" | ||
|
||
// Split a subsection of data into fields. | ||
// Starts from offset and takes sum fields entries, splitting them according | ||
// to fields. | ||
// fields is a dictionary of field names and widths | ||
// @param x fields | ||
// @param y offset | ||
// @param z data | ||
// @return the split subsection of the vector | ||
.finos.unzip.priv.split:{(key x)!(get sums prev x)cut z y+til sum x} | ||
|
||
// Parse a range of data with a header. | ||
// parser is a function of three arguments: | ||
// its first argument will be (data;extra); extra is passed as :: if not | ||
// included | ||
// its second argument will be the starting index of the record to extract | ||
// its third argument will be the raw headers of the record, split and | ||
// labeled according to fields | ||
// it should return (record;next index) | ||
// parser will be called until it returns next index equal to length | ||
// @param x (parser;fields;extra) | ||
// @param y data | ||
// @param z length | ||
// @return parsed records | ||
// @see .finos.unzip.priv.split | ||
.finos.unzip.priv.parse:{ | ||
if[2=count x; | ||
x,:(::); | ||
]; | ||
|
||
f:{ | ||
$[ | ||
(z 1)=z 2; | ||
z; | ||
[ | ||
h:.finos.unzip.priv.split[x 1;z 1]y; | ||
a:x[0][(y;x 2);(z 1)+sum x 1]h; | ||
(raze(first z;enlist a 0);a 1;z 2)]]}; | ||
|
||
1_first f[x][y]over(enlist(enlist`)!enlist(::);0;z)} | ||
|
||
// field names and widths for end-of-central-directory | ||
.finos.unzip.priv.wecd:`sig`dnu`dcd`den`ten`csz`cof`cln`cmt!4 2 2 2 2 4 4 2 0 | ||
|
||
// Parse end-of-central-directory record. | ||
// @param x bytes | ||
// @return end-of-central-directory record | ||
.finos.unzip.priv.pecd:{ | ||
r:.finos.unzip.priv.split[.finos.unzip.priv.wecd;0]x; | ||
r:![r;();0b;{y!x y}[{({0x0 sv reverse x};x)}'](key r)except`sig`cmt]; | ||
r:update cmt:"c"$(neg cln)#x from r; | ||
r} | ||
|
||
// field names and widths for central directory | ||
.finos.unzip.priv.wcd:`sig`ver`vrr`flg`cmp`mtm`mdt`crc`csz`usz`nln`xln`cln`dnu`iat`xat`lof!4 2 2 2 2 2 2 4 4 4 2 2 2 2 2 4 4 | ||
|
||
// Parse a central directory record. | ||
// @param x (bytes;extra) | ||
// @param y index | ||
// @param z header | ||
// @return (record;next index) | ||
// @see .finos.unzip.priv.parse | ||
.finos.unzip.priv.pcd:{ | ||
e:x 1; | ||
x:x 0; | ||
|
||
r:update | ||
{("i"$first x)%10}ver, | ||
{("i"$first x)%10}vrr, | ||
0b vs 0x0 sv reverse flg, | ||
0x0 sv reverse cmp, | ||
{"v"$24 60 60 sv 1 1 2*2 sv'0 5 11 cut 0b vs 0x0 sv reverse x}mtm, | ||
{.finos.util.ymd . 1980 0 0+2 sv'0 7 11 cut 0b vs 0x0 sv reverse x}mdt, | ||
0x0 sv reverse csz, | ||
0x0 sv reverse usz, | ||
0x0 sv reverse nln, | ||
0x0 sv reverse xln, | ||
0x0 sv reverse cln, | ||
0x0 sv reverse dnu, | ||
0b vs 0x0 sv reverse iat, | ||
0b vs 0x0 sv reverse xat, | ||
0x0 sv reverse lof | ||
from z; | ||
|
||
r:update | ||
fnm:`$"c"$x y+til nln, | ||
xfd:x y+nln+til xln, | ||
cmt:"c"$x y+nln+xln+til cln | ||
from r; | ||
|
||
(r;exec y+nln+xln+cln from r)} | ||
|
||
// field names and widths for extra field | ||
.finos.unzip.priv.wxfd:`id`sz!2 2 | ||
|
||
// Parse an extra field record. | ||
// @param x (bytes;extra) | ||
// @param y index | ||
// @param z header | ||
// @return (record;next index) | ||
// @see .finos.unzip.priv.parse | ||
.finos.unzip.priv.pxfd:{ | ||
e:x 1; | ||
x:x 0; | ||
|
||
r:update | ||
reverse id, | ||
0x0 sv reverse sz | ||
from z; | ||
|
||
r:$[ | ||
/ ZIP64 | ||
0x0001~r`id; | ||
/ a variable number of fields in fixed order: parse according to size | ||
r,:0x0 sv'reverse each{((count y)#x)!y}[`usz`csz`lof`dnu](sums prev{(1+((type y)$sums y)?x)#y}[r`sz]8 8 8 4h)cut y _x; | ||
|
||
/ Extended Timestamp | ||
0x5455~r`id; | ||
[ | ||
/ check field size matches flag byte | ||
if[(r`sz)<>1+4*sum 0b vs first(r`sz)#y _x; | ||
'`parse; | ||
]; | ||
/ a variable number of fields in fixed order: parse according to size | ||
r,:ltime .finos.util.timestamp_from_epoch each 0x0 sv'reverse each{((count y)#x)!y}[`mtime`atime`ctime]4 cut 1_(r`sz)#y _x; | ||
r]; | ||
|
||
/ Extended Timestamp | ||
0x7875~r`id; | ||
[ | ||
/ check version | ||
if[1<>first(r`sz)#y _x; | ||
'`nyi; | ||
]; | ||
/ check field size is consistent with data | ||
if[(r`sz)<>3+last{r:x 1;x:x 0;s:first x;((1+s)_x;r+s)}over(1_(r`sz)#y _x;0); | ||
'`parse; | ||
]; | ||
r,:0x0 sv'reverse each`uid`gid!last{r:x 1;x:x 0;s:first x;x:1_x;$[s;(s _ x;r,enlist s#x);(x;r)]}over(1_(r`sz)#y _x;()); | ||
r]; | ||
|
||
[ | ||
.finos.log.warning(-3!r`id),": unimplemented extra field id; skipping"; | ||
r]]; | ||
|
||
(r;exec y+sz from r)} | ||
|
||
// field names and widths for field data | ||
.finos.unzip.priv.wfd:`sig`ver`os`flg`cmp`mtm`mdt`crc`csz`usz`nln`xln!4 1 1 2 2 2 2 4 4 4 2 2 | ||
|
||
// Parse a file data record. | ||
// @param x (bytes;extra) | ||
// @param y index | ||
// @param z header | ||
// @return (record;next index) | ||
// @see .finos.unzip.priv.parse | ||
.finos.unzip.priv.pfd:{ | ||
e:x 1; | ||
x:x 0; | ||
|
||
r:update | ||
{("i"$first x)%10}ver, | ||
first os, | ||
0b vs 0x0 sv reverse flg, | ||
0x0 sv reverse cmp, | ||
{"v"$24 60 60 sv 1 1 2*2 sv'0 5 11 cut 0b vs 0x0 sv reverse x}mtm, | ||
{.finos.util.ymd . 1980 0 0+2 sv'0 7 11 cut 0b vs 0x0 sv reverse x}mdt, | ||
0x0 sv reverse csz, | ||
0x0 sv reverse usz, | ||
0x0 sv reverse nln, | ||
0x0 sv reverse xln | ||
from z; | ||
|
||
r:update fnm:`$"c"$x y+til nln from r; | ||
|
||
r:update xfd:x y+nln+til xln from r; | ||
|
||
if[(not r`xln)&any -1=r`csz`usz; | ||
'`parse; | ||
]; | ||
|
||
if[r`xln; | ||
r:update .finos.unzip.priv.parse[(.finos.unzip.priv.pxfd;.finos.unzip.priv.wxfd);xfd;count xfd]from r; | ||
|
||
/ if ZIP64 record, upsert | ||
r,:exec{$[not any i:0x0001~/:x[;`id];();1=sum i;2_x first where i;'`parse]}xfd from r; | ||
|
||
/ ignore any other extra fields for now | ||
]; | ||
|
||
r:update | ||
fdt:x y+nln+xln+til csz, | ||
dtd:x{(x*count y)#y}[flg 3]y+nln+xln+csz+til 3 | ||
from r; | ||
|
||
/ TODO can this filter be applied any earlier? | ||
r:$[ | ||
(e~(::))|(r`fnm)in e; | ||
[ | ||
.finos.log.info"inflating ",string r`fnm; | ||
|
||
$[ | ||
/ no compression: copy | ||
0=r`cmp;update fdu:"c"$fdt from r; | ||
|
||
/ deflate: reframe as gzip stream and inflate | ||
8=r`cmp;update fdu:"c"$(.Q.gz 0x1f8b0800000000000003,fdt,crc,4#reverse 0x0 vs usz mod prd 32#2)from r; | ||
|
||
'`nyi]]; | ||
update fdu:""from r]; | ||
|
||
(r;exec y+nln+xln+csz+3*flg 3 from r)} | ||
|
||
// Extract one file from an archive using unzip(1). | ||
// @param x hsym | ||
// @param y sym | ||
// @return character vector | ||
.finos.unzip.priv.unzip_system:{ | ||
f:hsym`$first system"mktemp"; | ||
system"(unzip -p \"",(1_string x),"\" \"",(string y),"\" >",(1_string f),")"; | ||
r:"c"$read1 f; | ||
hdel f; | ||
r} | ||
|
||
// Perform various zip-related operations. | ||
// Possible values for x, and expected z arg in each case: | ||
// `list: List files in an archive. | ||
// z: ignored | ||
// `unzip: Extract (specific file(s) from) an archive. | ||
// z: sym, sym vector, or (::) to unzip all files | ||
// See https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html, | ||
// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT, etc. | ||
// @param x sym | ||
// @param y hsym, character vector, or byte vector | ||
// @param z see above | ||
// @return dictionary of filenames and character vectors | ||
.finos.unzip.priv.unzip:{ | ||
if[not x in`list`unzip; | ||
'`domain; | ||
]; | ||
|
||
/ accept file; save filename as n | ||
/ (n will be () if x is not an hsym) | ||
if[-11h=type y; | ||
n:y; | ||
y:read1 y; | ||
]; | ||
|
||
/ accept chars | ||
if[10h=type y; | ||
y:"x"$y; | ||
]; | ||
|
||
/ accept bytes | ||
if[4h<>type y; | ||
'`type; | ||
]; | ||
|
||
if[`unzip=x; | ||
if[not(11h=abs type z)|z~(::); | ||
'`domain; | ||
]; | ||
/ | ||
if[not z~(::); | ||
'`nyi; | ||
]; | ||
\ | ||
]; | ||
|
||
.finos.log.info"processing ",$[-11h=type n;1_string n;"archive"]; | ||
|
||
/ look for central directory signature | ||
/ assume last match is valid | ||
/ more sophisticated algos are possible, but they can be implemented as needed | ||
cds:("c"$y)ss"c"$0x504b0506; | ||
if[1>count cds; | ||
'"no cds"; | ||
]; | ||
cds:last cds; | ||
|
||
/ parse end-of-central-directory record | ||
ecd:.finos.unzip.priv.pecd y(first cds)+til(count y)-first cds; | ||
|
||
/ punt on multi-disk archives | ||
if[0<>ecd`dnu;'`nyi]; | ||
if[0<>ecd`dcd;'`nyi]; | ||
|
||
/ TODO delete? | ||
/ blank record table | ||
/o:enlist(enlist`)!enlist(::); | ||
|
||
r:$[ | ||
`list=x; | ||
[ | ||
/ bytes of central directory record | ||
cd:y(ecd`cof)+til ecd`csz; | ||
|
||
/ parse central directory | ||
cd:.finos.unzip.priv.parse[(.finos.unzip.priv.pcd;.finos.unzip.priv.wcd);cd;count cd]; | ||
1!select name:fnm,size:usz,timestamp:mdt+mtm from cd]; | ||
`unzip=x; | ||
[ | ||
/ parse file data | ||
fd:.finos.unzip.priv.parse[(.finos.unzip.priv.pfd;.finos.unzip.priv.wfd;z);y;ecd`cof]; | ||
|
||
r:exec fnm!fdu from fd; | ||
|
||
r:$[ | ||
11h=type z; | ||
z#r; | ||
-11h=type z; | ||
r z; | ||
r]; | ||
|
||
if[.finos.unzip.verify&-11h=type n; | ||
.finos.log.info"verifying"; | ||
v:r~$[ | ||
-11h=type z; | ||
.finos.unzip.priv.unzip_system[n]z; | ||
{y!x y}[n .finos.unzip.priv.unzip_system/:]key r]; | ||
if[not v; | ||
'`parse; | ||
]; | ||
.finos.log.info"verified"; | ||
]; | ||
r]; | ||
'`domain]; | ||
|
||
r} | ||
|
||
// Set to true to verify extraction against unzip(1). | ||
// N.b. will not work if .finos.unzip.unzip is called from a thread | ||
.finos.unzip.verify:0b | ||
|
||
// List files in an archive. | ||
// @param x hsym, character vector, or byte vector | ||
// @return table of filenames and file metadata | ||
.finos.unzip.list:{.finos.unzip.priv.unzip[`list;x;::]} | ||
|
||
// Unzip an archive. | ||
// @param x hsym, character vector, or byte vector | ||
// @return dictionary of filenames and character vectors | ||
.finos.unzip.unzip:{.finos.unzip.priv.unzip[`unzip;x;::]} | ||
|
||
// Unzip specific files from an archive. | ||
// @param x hsym, character vector, or byte vector | ||
// @param y sym vector | ||
// @return dictionary of filenames and character vectors | ||
.finos.unzip.unzip2:{.finos.unzip.priv.unzip[`unzip;x;y]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// log stubs | ||
.finos.log.critical:{-1"CRITICAL: ",x;} | ||
.finos.log.error :{-1"ERROR: " ,x;} | ||
.finos.log.warning :{-1"WARNING: " ,x;} | ||
.finos.log.info :{-1"INFO: " ,x;} | ||
.finos.log.debug :{-1"DEBUG: " ,x;} | ||
|
||
// Run and log garbage collection. | ||
.finos.util.free:{[].finos.log.debug"freed ",(string .Q.gc[])," bytes";} | ||
|
||
// Date from year/month/day. | ||
.finos.util.ymd:{"D"$"."sv"0"^-4 -2 -2$string(x;y;z)}' | ||
|
||
// Convert epoch seconds to (global) timestamp. | ||
// @param x number or number vector | ||
// @return timestamp or timestamp vector | ||
.finos.util.timestamp_from_epoch:{"p"$("j"$1970.01.01D)+1000000000*x} | ||
|
||
// Print progress, with peach and try-catch. | ||
// The weight function is used to measure progress more accurately when | ||
// different arguments will take significantly different amounts of time. | ||
// When this is not the case, pass a constant function (e.g. {1}). | ||
// E.g. to (re/de)compress files, set/unset .z.zd and pass x as hcount, y | ||
// as {x set get x}, and z as the files. | ||
// @param x monadic function: weight (e.g. hcount, {1}, etc.) | ||
// @param y monadic function | ||
// @param z list: args for y | ||
// @return dict: z!@[(1b;)y@;;(0b;)]peach z | ||
.finos.util.progress:{ | ||
f:{[s;f;a;w;i] | ||
eta:{x+(abs type e)$(e:y-x)%z}; | ||
dll:{" "sv(key x){": "sv(string x;$[10<>type y;string;]y)}'get x}; | ||
progper:{ | ||
paren:{"(",x,")"}; | ||
prog:{"/"sv(neg count string y)$string(x;y)}; | ||
per:{.Q.fmt[6;2;100*x],"%"}; | ||
" "sv(prog[x;y];paren per x%y)}; | ||
.finos.log.debug dll`now`position`work`elapsed`eta!( | ||
p; | ||
progper[i+1;count a]; | ||
progper[w i;last w]; | ||
p-s; | ||
eta[s;p:.z.P;(w i)%last w] | ||
); | ||
@[(1b;)f@;a i;(0b;)]}; | ||
z!f[.z.P;y;z;w:sums x peach z]peach til count z} | ||
|