From 7879a3fba1310da0ccc1c336515b5b4a409c3ccb Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 13 Jun 2019 16:57:11 -0700 Subject: [PATCH] New zic -b option to shrink data and test y2038 * Makefile (ZFLAGS): Mention -b in comment. * NEWS: Mention this. * zic.c (usage): Mention -b. (bloat): New static var. (want_bloat): New static function. (main): Parse new -b option. (writezone): In slim output, suppress QTBUG-53071 workaround, pre-2011 bug workaround, all-zero isstdcnt and isutcnt indicators, and 32-bit data (except for header and the single type that RFC 8536 requires). (outzone): In slim output, suppress redundant transitions back to 1900 or forward through 2038, and suppress redundant transitions just before the TZ string takes over. * zic.8: Document -b. Sort options alphabetically. --- Makefile | 12 ++++++-- NEWS | 14 +++++++++ zic.8 | 47 ++++++++++++++++++++++++------ zic.c | 89 ++++++++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 133 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index 25f1d3578..35db07da8 100644 --- a/Makefile +++ b/Makefile @@ -362,9 +362,17 @@ LEAPSECONDS= zic= ./zic ZIC= $(zic) $(ZFLAGS) -# To shrink the size of installed TZif files, +# Append "-b fat" to install larger TZif files that work around +# incompatiblities and bugs in some TZif readers, notably readers that +# mishandle 64-bit data in TZif files. Append "-b slim" to install +# smaller TZif files that test for these year-2038 bugs. If no -b +# option is given, the current default is "-b fat", but this is +# intended to change as buggy readers often mishandle timestamps +# after 2038 anyway. +# +# To shrink the size of installed TZif files even further, # append "-r @N" to omit data before N-seconds-after-the-Epoch. -# See the zic man page for more about -r. +# See the zic man page for more about -b and -r. ZFLAGS= # How to use zic to install TZif files. diff --git a/NEWS b/NEWS index 56bbc7f06..0e4c68868 100644 --- a/NEWS +++ b/NEWS @@ -41,6 +41,20 @@ Unreleased, experimental changes Changes to code + zic's new -b option supports a way to control data bloat and to + test for year-2038 bugs in software that reads TZif files. + 'zic -b fat' and 'zic -b slim' generate larger and smaller output; + for example, changing from fat to slim shrinks the Europe/London + file from 3648 to 1625 bytes, saving about 55%. Fat and slim + files represent the same time data and use the same TZif format as + documented in tzfile(5) and in Internet RFC 8536. Fat format + attempts to work around bugs or incompatibilities on older + software that reads TZif files, notably software that mishandles + 64-bit TZif data. Slim format is more efficient and tests for + these bugs. Currently zic defaults to fat format, although this + is intended to change in future zic versions, as the buggy + software typically mishandles post-2038 timestamps anyway. + zic no longer treats a set of rules ending in 2037 specially. Previously, zic assumed that such a ruleset meant that future timestamps could not be predicted, and therefore omitted a diff --git a/zic.8 b/zic.8 index 89ea2d990..d02994f31 100644 --- a/zic.8 +++ b/zic.8 @@ -45,6 +45,32 @@ Output version information and exit. .B \*-\*-help Output short usage message and exit. .TP +.BI "\*-b " bloat +Output backward-compatibility data as specified by +.IR bloat . +If +.I bloat +is +.BR fat , +generate additional data entries that work around potential bugs or +incompatibilities in older software, such as software that mishandles +the 64-bit generated data. +If +.I bloat +is +.BR slim , +keep the output files small; this can help check for the bugs +and incompatibilities. +Although the default is currently +.BR fat , +this is intended to change in future +.B zic +versions, as software that mishandles the 64-bit data typically +mishandles timestamps after the year 2038 anyway. +Also see the +.B \*-r +option for another way to shrink output size. +.TP .BI "\*-d " directory Create time conversion information files in the named directory rather than in the standard directory named below. @@ -59,6 +85,11 @@ will act as if the input contained a link line of the form .ti +.5i Link \fItimezone\fP localtime .TP +.BI "\*-L " leapsecondfilename +Read leap second information from the file with the given name. +If this option is not used, +no leap second information appears in output files. +.TP .BI "\*-p " timezone Use .IR timezone 's @@ -70,15 +101,6 @@ will act as if the input contained a link line of the form .ti +.5i Link \fItimezone\fP posixrules .TP -.BI "\*-t " file -When creating local time information, put the configuration link in -the named file rather than in the standard location. -.TP -.BI "\*-L " leapsecondfilename -Read leap second information from the file with the given name. -If this option is not used, -no leap second information appears in output files. -.TP .BR "\*-r " "[\fB@\fP\fIlo\fP][\fB/@\fP\fIhi\fP]" Reduce the size of output files by limiting their applicability to timestamps in the range from @@ -102,6 +124,13 @@ On platforms with GNU .BR date , .q "zic \-r @$(date +%s)" omits data intended for past timestamps. +Also see the +.B "\*-b slim" +option for another way to shrink output size. +.TP +.BI "\*-t " file +When creating local time information, put the configuration link in +the named file rather than in the standard location. .TP .B \*-v Be more verbose, and complain about the following situations: diff --git a/zic.c b/zic.c index d43cc0d1b..34228a559 100644 --- a/zic.c +++ b/zic.c @@ -574,8 +574,9 @@ usage(FILE *stream, int status) { fprintf(stream, _("%s: usage is %s [ --version ] [ --help ] [ -v ] \\\n" - "\t[ -l localtime ] [ -p posixrules ] [ -d directory ] \\\n" - "\t[ -t localtime-link ] [ -L leapseconds ] [ -r '[@lo][/@hi]' ] \\\n" + "\t[ -b {slim|fat} ] [ -d directory ] [ -l localtime ]" + " [ -L leapseconds ] \\\n" + "\t[ -p posixrules ] [ -r '[@lo][/@hi]' ] [ -t localtime-link ] \\\n" "\t[ filename ... ]\n\n" "Report bugs to %s.\n"), progname, progname, REPORT_BUGS_TO); @@ -650,6 +651,17 @@ static const char * leapsec; static const char * tzdefault; static const char * yitcommand; +/* -1 if the TZif output file should be slim, 0 if default, 1 if the + output should be fat for backward compatibility. Currently the + default is fat, although this may change. */ +static int bloat; + +static bool +want_bloat(void) +{ + return 0 <= bloat; +} + int main(int argc, char **argv) { @@ -681,10 +693,22 @@ main(int argc, char **argv) } else if (strcmp(argv[k], "--help") == 0) { usage(stdout, EXIT_SUCCESS); } - while ((c = getopt(argc, argv, "d:l:L:p:r:st:vy:")) != EOF && c != -1) + while ((c = getopt(argc, argv, "b:d:l:L:p:r:st:vy:")) != EOF && c != -1) switch (c) { default: usage(stderr, EXIT_FAILURE); + case 'b': + if (strcmp(optarg, "slim") == 0) { + if (0 < bloat) + error(_("incompatible -b options")); + bloat = -1; + } else if (strcmp(optarg, "fat") == 0) { + if (bloat < 0) + error(_("incompatible -b options")); + bloat = 1; + } else + error(_("invalid option: -b '%s'"), optarg); + break; case 'd': if (directory == NULL) directory = optarg; @@ -1921,7 +1945,7 @@ writezone(const char *const name, const char *const string, char version, seconds, as the idea is to insert a transition just before 32-bit time_t rolls around, and this occurs at a slightly different moment if transitions are leap-second corrected. */ - if (WORK_AROUND_QTBUG_53071 && timecnt != 0 + if (WORK_AROUND_QTBUG_53071 && timecnt != 0 && want_bloat() && ats[timecnt - 1] < y2038_boundary - 1 && strchr(string, '<')) { ats[timecnt] = y2038_boundary - 1; types[timecnt] = types[timecnt - 1]; @@ -1970,7 +1994,7 @@ writezone(const char *const name, const char *const string, char version, int old0; char omittype[TZ_MAX_TYPES]; int typemap[TZ_MAX_TYPES]; - register int thistypecnt; + int thistypecnt, stdcnt, utcnt; char thischars[TZ_MAX_CHARS]; int thischarcnt; bool toomanytimes; @@ -2053,7 +2077,7 @@ writezone(const char *const name, const char *const string, char version, ** (to help get global "altzone" and "timezone" variables ** set correctly). */ - { + if (want_bloat()) { register int mrudst, mrustd, hidst, histd, type; hidst = histd = mrudst = mrustd = -1; @@ -2100,12 +2124,16 @@ writezone(const char *const name, const char *const string, char version, for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i) indmap[i] = -1; - thischarcnt = 0; + thischarcnt = stdcnt = utcnt = 0; for (i = old0; i < typecnt; i++) { register char * thisabbr; if (omittype[i]) continue; + if (ttisstds[i]) + stdcnt = thistypecnt; + if (ttisgmts[i]) + utcnt = thistypecnt; if (indmap[abbrinds[i]] >= 0) continue; thisabbr = &chars[abbrinds[i]]; @@ -2118,12 +2146,18 @@ writezone(const char *const name, const char *const string, char version, } indmap[abbrinds[i]] = j; } + if (pass == 1 && !want_bloat()) { + utcnt = stdcnt = thisleapcnt = 0; + thistimecnt = - locut - hicut; + thistypecnt = thischarcnt = 1; + thistimelim = thistimei; + } #define DO(field) fwrite(tzh.field, sizeof tzh.field, 1, fp) tzh = tzh0; memcpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic); tzh.tzh_version[0] = version; - convert(thistypecnt, tzh.tzh_ttisgmtcnt); - convert(thistypecnt, tzh.tzh_ttisstdcnt); + convert(utcnt, tzh.tzh_ttisgmtcnt); + convert(stdcnt, tzh.tzh_ttisstdcnt); convert(thisleapcnt, tzh.tzh_leapcnt); convert(locut + thistimecnt + hicut, tzh.tzh_timecnt); convert(thistypecnt, tzh.tzh_typecnt); @@ -2138,6 +2172,15 @@ writezone(const char *const name, const char *const string, char version, DO(tzh_typecnt); DO(tzh_charcnt); #undef DO + if (pass == 1 && !want_bloat()) { + /* Output a minimal data block with just one time type. */ + puttzcode(0, fp); /* utoff */ + putc(0, fp); /* dst */ + putc(0, fp); /* index of abbreviation */ + putc(0, fp); /* empty-string abbreviation */ + continue; + } + /* Output a LO_TIME transition if needed; see limitrange. But do not go below the minimum representable value for this pass. */ @@ -2193,10 +2236,12 @@ writezone(const char *const name, const char *const string, char version, puttzcodepass(todo, fp, pass); puttzcode(corr[i], fp); } - for (i = old0; i < typecnt; i++) + if (stdcnt != 0) + for (i = old0; i < typecnt; i++) if (!omittype[i]) putc(ttisstds[i], fp); - for (i = old0; i < typecnt; i++) + if (utcnt != 0) + for (i = old0; i < typecnt; i++) if (!omittype[i]) putc(ttisgmts[i], fp); swaptypes(old0, thisdefaulttype); @@ -2643,16 +2688,18 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount) max_year = min_year + years_of_observations; } } - /* - ** For the benefit of older systems, - ** generate data from 1900 through 2038. - */ - if (min_year > 1900) - min_year = 1900; max_year0 = max_year; - if (max_year < 2038) + if (want_bloat()) { + /* For the benefit of older systems, + generate data from 1900 through 2038. */ + if (min_year > 1900) + min_year = 1900; + if (max_year < 2038) max_year = 2038; + } + for (i = 0; i < zonecount; ++i) { + struct rule *prevrp = NULL; /* ** A guess that may well be corrected later. */ @@ -2788,6 +2835,11 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount) doabbr(ab, zp, rp->r_abbrvar, rp->r_isdst, rp->r_stdoff, false); offset = oadd(zp->z_gmtoff, rp->r_stdoff); + if (!want_bloat() && !useuntil && !do_extend + && prevrp + && rp->r_hiyear == ZIC_MAX + && prevrp->r_hiyear == ZIC_MAX) + break; type = addtype(offset, ab, rp->r_isdst, rp->r_todisstd, rp->r_todisgmt); if (defaulttype < 0 && !rp->r_isdst) @@ -2797,6 +2849,7 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount) && ktime < attypes[lastatmax].at)) lastatmax = timecnt; addtt(ktime, type); + prevrp = rp; } } if (usestart) {