From bcf583e8a365ad11a0b3b3373c76e93dfdf0b66e Mon Sep 17 00:00:00 2001 From: Dominique Quatravaux Date: Thu, 21 Feb 2019 16:31:46 +0100 Subject: [PATCH] Nightly disk usage report microservice * Generate qdirstat report every night using a modified version of the script referenced by https://superuser.com/a/1028907/48763 (freeware / public domain) * Run the script in a slightly modified version of the "perl" stock Docker image * Plumb down all the Kubernetes / OpenShift objects to build the image, host the script (from a ConfigMap), and run it (as a k8s CronJob) * Jury-rigged Makefile to work around https://github.com/openshift/origin/issues/8971 and share the YAML between test and production --- openshift/Makefile | 21 + openshift/disk-usage-report.yaml | 673 +++++++++++++++++++++++++++++++ 2 files changed, 694 insertions(+) create mode 100644 openshift/Makefile create mode 100644 openshift/disk-usage-report.yaml diff --git a/openshift/Makefile b/openshift/Makefile new file mode 100644 index 000000000..a2aefc9e1 --- /dev/null +++ b/openshift/Makefile @@ -0,0 +1,21 @@ +# A Makefile that mostly works around +# https://github.com/openshift/origin/issues/8971 + +.PHONY: usage +usage: + @echo >&2 "Usage: $(MAKE) pushtotest" + @echo >&2 " $(MAKE) pushtoprod" + + +.PHONY: pushtotest +pushtotest: + -oc delete -n wwp-test -f disk-usage-report.yaml + oc create -n wwp-test -f disk-usage-report.yaml + +.PHONY: pushtoprod +pushtoprod: + -$(test2prod) < disk-usage-report.yaml | oc delete -n wwp -f - + $(test2prod) < disk-usage-report.yaml | oc create -n wwp -f - + + +test2prod := perl -pe 's|docker-registry.default.svc:5000/wwp-test|docker-registry.default.svc:5000/wwp|g' diff --git a/openshift/disk-usage-report.yaml b/openshift/disk-usage-report.yaml new file mode 100644 index 000000000..73e96b262 --- /dev/null +++ b/openshift/disk-usage-report.yaml @@ -0,0 +1,673 @@ +# Cron job to create a qdirstat-compatible report every morning. +# +# After each change, push to the test namespace (wwp-test) with +# +# oc delete -f disk-usage-report.yaml +# oc create -f disk-usage-report.yaml +# +# After some certainty is acquired that things work indeed, push +# to production with +# +# make pushtoprod +# +# TODO: Also output some Prometheus-style stats per directory for +# long-term graphs. +--- +apiVersion: build.openshift.io/v1 +kind: BuildConfig +metadata: + name: perl-disk-usage-report +spec: + output: + to: + kind: "DockerImage" + name: "docker-registry.default.svc:5000/wwp-test/perl-disk-usage-report:latest" + source: + type: Dockerfile + dockerfile: | + FROM perl + RUN cpan URI::Escape + runPolicy: Serial + strategy: + dockerStrategy: + noCache: true + type: Docker + successfulBuildsHistoryLimit: 5 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: srv-disk-usage-report-scripts +data: +# Lots and lots of indent plzkthxbai, so that C-x $ in Emacs works +# Also, the "perl" official image actually has two Perls, one in /usr/local/bin +# and another in /usr/bin, with different versions #boopmeintheboop + qdirstat-cache-writer: |- + #!/usr/local/bin/perl -w + # + # qdirstat-cache-writer - script to write QDirStat cache files from cron jobs + # + # QDirStat can read its information from cache files. This is a lot faster than + # reading all the directories in a directory tree and obtaining detailed + # information (size, type, last modification time) for each file and directory + # with the opendir() / readdir() and lstat() system calls for each individual + # file and directory. + # + # QDirStat can also write those cache files ("Write Cache File..." from the + # "File" menu), but the whole point of cache files is being able to do that in + # the background when the user does not have to wait for it - like in a cron + # job running in the middle of the night. QDirStat itself cannot be used to do + # that because it is a KDE program and thus an X program that needs access to + # an X display - which cron does not provide. + # + # This is what this Perl script is for. + # + # Usage: + # qdirstat-cache-writer [-lvdh] [] + # + # If not specified, defaults to ".qdirstat.cache.gz" + # in . + # + # If ends with ".gz", it will be compressed with gzip. + # qdirstat can read gzipped and plain text cache files. + # + # -l long format - always add full path, even for plain files + # -m scan mounted file systems (cross file system boundaries) + # -v verbose + # -d debug + # -h help (usage message) + # + # Author: Stefan Hundhammer + # + # This script is freeware. Fuck the lawyers and their legalese bullshit. + # All they ever contribute to software is those legalese headers that are just + # in the way of people working with the code. + + + # TO DO: + # + # - ensure to use UTF-8 + + + + use strict; + use English; + use Getopt::Std; + use Fcntl ':mode'; + use Encode; + use URI::Escape qw(uri_escape); + use vars qw( $opt_l $opt_m $opt_v $opt_d $opt_h ); + + + # Forward declarations. + + sub main(); + + + # Global variables. + + my $long_format = 0; + my $scan_mounted = 0; + my $verbose = 0; + my $debug = 0; + + my $default_cache_file_name = ".qdirstat.cache.gz"; + + my $toplevel_dev_no = undef; + my $toplevel_dev_name = undef; + my $unsafe_chars = "\x00-\x20%"; + + + # Call the main function and exit. + # DO NOT enter any other code outside a sub - + # any variables would otherwise be global. + + + main(); + exit 0; + + + #----------------------------------------------------------------------------- + + + sub main() + { + # Extract command line options. + # This will set a variable opt_? for any option, + # e.g. opt_v if option '-v' is passed on the command line. + + getopts('lmvdh'); + + usage() if $opt_h; + $long_format = 1 if $opt_l; + $scan_mounted = 1 if $opt_m; + $verbose = 1 if $opt_v; + $debug = 1 if $opt_d; + + # One or two parameters are required + # (yes, Perl does weird counting) + usage() if $#ARGV < 0 || $#ARGV > 1; + + my $toplevel_dir = shift @ARGV; + $toplevel_dir = absolute_path( $toplevel_dir ); + + my $cache_file_name; + + if ( $#ARGV < 0 ) # No more command line arguments? + { + $cache_file_name = $toplevel_dir . "/" . $default_cache_file_name; + } + else + { + $cache_file_name = shift @ARGV; + } + + write_cache_file( $toplevel_dir, $cache_file_name ); + compress_file( $cache_file_name ); + } + + + #----------------------------------------------------------------------------- + + + # Write a QDirStat cache. + # + # Parameters: + # $toplevel_dir + # $cache_file_name + + sub write_cache_file() + { + my ( $toplevel_dir, $cache_file_name ) = @_; + my $start_time = time(); + + open( CACHE, ">" . $cache_file_name ) or die "Can't open $cache_file_name"; + binmode( CACHE, ":bytes" ); + write_cache_header(); + write_cache_tree( $toplevel_dir ); + + my $elapsed = time() - $start_time; + my ( $sec, $min, $hours ) = gmtime( $elapsed ); + printf CACHE "# Elapsed time: %d:%02d:%02d\n", $hours, $min, $sec; + + close( CACHE ); + } + + + #----------------------------------------------------------------------------- + + + # Compress a file if its extension is ".gz". + # + # Parameters: + # $file_name + + sub compress_file() + { + my ( $file_name ) = @_; + + if ( $file_name =~ /.*\.gz$/ ) + { + my $uncompressed_name = $file_name; + $uncompressed_name =~ s/\.gz$//; # Cut off ".gz" extension + rename( $file_name, $uncompressed_name ); + logf( "Compressing $file_name" ); + system( "gzip $uncompressed_name" ); + } + + } + + + #----------------------------------------------------------------------------- + + + # Write the cache file header + # + # Parameters: + # --- + + sub write_cache_header() + { + print CACHE <<'EOF'; + [qdirstat 1.0 cache file] + # Generated by qdirstat-cache-writer + # Do not edit! + # + # Type path size mtime + + + EOF + } + + + #----------------------------------------------------------------------------- + + + # Write cache entries for a directory tree. + # + # Parameters: + # $dir Starting directory + + sub write_cache_tree($); # Need prototype for calling recursively + + sub write_cache_tree($) + { + my ( $dir ) = @_; + logf( "Reading $dir" ); + + return if ( ! write_dir_entry( $dir ) ); + + my @files; + my @subdirs; + + my $success = opendir( DIR, $dir ); + + if ( ! $success ) + { + my $msg = "Can't open $dir: $ERRNO\n"; + print CACHE "# $msg\n"; + logf( $msg ); + + return; + } + + my $entry; + + while ( $entry = readdir( DIR ) ) + { + if ( $entry ne "." and + $entry ne ".." ) + { + my $full_path = $dir . "/" . $entry; + + if ( (! -l $full_path) && -d _ ) + { + push @subdirs, $entry; + } + else + { + push @files, [ $entry, [lstat _] ]; + } + } + } + + closedir( DIR ); + + { + my $file; + + foreach $entry ( @files ) + { + my ($file, $stat) = @$entry; + write_file_entry( $dir, $file, $stat ); + } + + + my $subdir; + + foreach $subdir ( @subdirs ) + { + write_cache_tree( $dir . "/" . $subdir ); + } + } + } + + + #----------------------------------------------------------------------------- + + + # Write a cache entry for a directory. + # + # If the device of this directory is not the same as the toplevel device + # (i.e., if this is a mount point and thus file system boundaries would be + # crossed) only a comment line is written and an error value '0' is returned + # unless the "-m" command line option was used. + # + # Parameters: + # $dir directory + # + # Return value: + # 1 OK to continue + # 0 don't continue, file system boundary would be crossed + + sub write_dir_entry() + { + my ( $dir ) = @_; + my @lstat_result = lstat( $dir ); + + if ( scalar @lstat_result == 0 ) # Empty array -> lstat() failed + { + my $msg = "lstat() failed for $dir"; + print CACHE "# $msg\n"; + logf( $msg ); + + return; + } + + my ( $dev_no, + $ino, + $mode, + $links, + $uid, + $gid, + $rdev, + $size, + $atime, + $mtime, + $ctime, + $blksize, + $blocks ) = @lstat_result; + + + $dir =~ s://+:/:g; # Replace multiple // with one + my $escaped_dir = uri_escape( $dir, $unsafe_chars ); + + # Write cache file entry for this directory (even if it's a mount point) + + print CACHE "D $escaped_dir"; + print CACHE "\t$size"; + printf CACHE "\t0x%x\n", $mtime; + + if ( ! defined( $toplevel_dev_no ) ) + { + $toplevel_dev_no = $dev_no; + $toplevel_dev_name = device_name( $dir ); + print CACHE "# Device: $toplevel_dev_name\n\n"; + } + + if ( $dev_no == $toplevel_dev_no || $scan_mounted ) + { + return 1; + } + + my $dev_name = device_name( $dir ); + my $fs_boundary = $dev_name ne $toplevel_dev_name; + my $msg; + + if ( $fs_boundary ) + { + $msg = "File system boundary at mount point $dir on device $dev_name"; + } + else + { + $msg = "Mount point $dir is still on the same device $dev_name"; + } + + print CACHE "# $msg\n\n"; + logf( $msg ); + + return ! $fs_boundary; + } + + + #----------------------------------------------------------------------------- + + + # Get the device name where a directory is on from the 'df' command. + # + # Parameters: + # $dir directory + # + # Return value: + # device name ("/dev/sda3", "/dev/system/root") + + sub device_name() + { + my ( $dir ) = @_; + + my @df_output = `df "$dir" 2>/dev/null`; + return "" if scalar @df_output < 1; + + shift @df_output; # Remove header line + my ( $line ) = @df_output; + my ( $device_name ) = split( '\s+', $line ); + deb( "Directory $dir is on device $device_name" ); + + return $device_name; + } + + + #----------------------------------------------------------------------------- + + + # Write a cache entry for a plain file (or other non-directory i-node) + # + # Parameters: + # $dir directory + # $name file name (without path) + + sub write_file_entry() + { + my ( $dir, $name, $lstat_result ) = @_; + my @lstat_result = @$lstat_result; + + if ( scalar @lstat_result == 0 ) # Empty array -> lstat() failed + { + my $msg = "lstat() failed for $dir/$name"; + print CACHE "# $msg\n"; + logf( $msg ); + + return; + } + + my ( $dev, + $ino, + $mode, + $links, + $uid, + $gid, + $rdev, + $size, + $atime, + $mtime, + $ctime, + $blksize, + $blocks ) = @lstat_result; + + my $type = "F"; + + if ( S_ISREG ( $mode ) ) { $type = "F"; } + elsif ( S_ISLNK ( $mode ) ) { $type = "L"; } + elsif ( S_ISBLK ( $mode ) ) { $type = "BlockDev"; } + elsif ( S_ISCHR ( $mode ) ) { $type = "CharDev"; } + elsif ( S_ISFIFO( $mode ) ) { $type = "FIFO"; } + elsif ( S_ISSOCK( $mode ) ) { $type = "Socket"; } + + print CACHE "$type"; + + $name = uri_escape( $name, $unsafe_chars ); + + if ( $long_format ) + { + $dir = uri_escape( $dir, $unsafe_chars ); + my $full_path = $dir . "/" . $name; + $full_path =~ s://+:/:g; # Replace multiple // with one + + print CACHE " $full_path"; + } + else + { + print CACHE "\t$name"; + } + + print CACHE "\t$size"; + printf CACHE "\t0x%x", $mtime; + + print CACHE "\tblocks: $blocks" if $blocks > 0 && $blocks * 512 < $size; # Sparse file? + print CACHE "\tlinks: $links" if $links > 1; + + print CACHE "\n"; + } + + + #----------------------------------------------------------------------------- + + + # Make an absolute path of a possible relative path. + # + # Parameters: + # $dir relative or absolute path + # + # Return value: + # absolute path + + sub absolute_path() + { + my ( $dir ) = @_; + + return $dir if ( $dir =~ '^/' ); + + my $save_dir = $ENV{'PWD'}; + chdir( $dir ); + $dir = $ENV{'PWD'}; + + chdir $save_dir; + + return $dir; + } + + + #----------------------------------------------------------------------------- + + + # Log a message to stdout if verbose mode is set + # (command line option '-v'). + # + # Parameters: + # Messages to write (any number). + + sub logf() + { + my $msg; + + if ( $verbose ) + { + foreach $msg( @_ ) + { + print $msg . " "; + } + + $OUTPUT_AUTOFLUSH = 1; # inhibit buffering + print "\n"; + } + } + + + #----------------------------------------------------------------------------- + + + # Log a debugging message to stdout if debug mode is set + # (command line option '-d'). + # + # Parameters: + # Messages to write (any number). + + sub deb() + { + my $msg; + + if ( $debug ) + { + foreach $msg( @_ ) + { + print $msg . " "; + } + + $OUTPUT_AUTOFLUSH = 1; # inhibit buffering + print "\n"; + } + } + + + #----------------------------------------------------------------------------- + + + # Print usage message and abort program. + # + # Parameters: + # --- + + sub usage() + { + die <<"USAGE-END"; + + qdirstat-cache-writer - script to write QDirStat cache files from cron jobs + + QDirStat can read its information from cache files. This is a lot faster than + reading all the directories in a directory tree and obtaining detailed + information (size, type, last modification time) for each file and directory + with the opendir() / readdir() and lstat() system calls for each individual + file and directory. + + QDirStat can also write those cache files (\"Write Cache File...\" from the + \"File\" menu), but the whole point of cache files is being able to do that in + the background when the user does not have to wait for it - like in a cron + job running in the middle of the night. QDirStat itself cannot be used to do + that because it is a KDE program and thus an X program that needs access to + an X display - which cron does not provide. + + This is what this Perl script is for. + + Usage: + $0 [-ldvh] [] + + If not specified, defaults to \"$default_cache_file_name\" + in . + + If ends with \".gz\", it will be compressed with gzip. + qdirstat can read gzipped and plain text cache files. + + -l long format - always add full path, even for plain files + -m scan mounted file systems (cross file system boundaries) + -v verbose + -d debug + -h help (this usage message) + + USAGE-END + + } + +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: srv-disk-usage-report +spec: + schedule: "5 03 * * *" + jobTemplate: + spec: + template: + spec: + restartPolicy: Never + concurrencyPolicy: Forbid + containers: + - name: qdirstat-cache-writer + image: docker-registry.default.svc:5000/wwp-test/perl-disk-usage-report:latest + imagePullPolicy: Always + command: + - /bin/sh + - -c + - | + OUTDIR=/srv/batch/disk-usage-report + TARGET=/srv + exec >> $OUTDIR/log.txt 2>&1; set -e -x; date; id + /scripts/qdirstat-cache-writer $TARGET $OUTDIR/qdirstat.gz.NEW + mv $OUTDIR/qdirstat.gz.NEW $OUTDIR/qdirstat.gz + volumeMounts: + - name: scripts + mountPath: /scripts + - name: srv + mountPath: /srv + volumes: + - name: scripts + configMap: + name: srv-disk-usage-report-scripts + items: + - key: qdirstat-cache-writer + path: qdirstat-cache-writer + # "Owing to JSON limitations, you must specify the mode in decimal notation." + mode: 493 # 0755 + - name: srv + persistentVolumeClaim: + claimName: wordpress-0 +