Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 5969afaf5e
Fetching contributors…

Cannot retrieve contributors at this time

file 63 lines (55 sloc) 2.353 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#!/usr/bin/env python
#
# Find or delete files in S3 older than a given age and matching a pattern
# Useful for cleaning up old backups, etc.
#

from boto.s3.connection import S3Connection
import time
from optparse import OptionParser
import sys
import re

def main(args):
  parser = OptionParser()
  parser.add_option("--key", dest="key", metavar="KEY",
                    help="AWS Access Key")
  parser.add_option("--secret", dest="secret", metavar="SECRET",
                    help="AWS Access Secret Key")
  parser.add_option("--maxage", dest="maxage", metavar="SECONDS",
                    help="Max age a key(file) can have before we want to delete it")
  parser.add_option("--regex", dest="regex", metavar="REGEX",
                    help="Only consider keys matching this REGEX")
  parser.add_option("--bucket", dest="bucket", metavar="BUCKET",
                    help="Search for keys in a specific bucket")
  parser.add_option("--delete", dest="delete", metavar="REGEX", action="store_true",
                    default=False, help="Actually do a delete. If not specified, just list the keys found that match.")
  (config, args) = parser.parse_args(args)

  config_ok = True
  for flag in ("key", "secret", "maxage", "regex", "bucket"):
    if getattr(config, flag) is None:
      print >>sys.stderr, "Missing required flag: --%s" % flag
      config_ok = False

  if not config_ok:
    print >>sys.stderr, "Configuration is not ok, aborting..."
    return 1

  s3 = S3Connection(config.key, config.secret)

  config.maxage = int(config.maxage)
  config.regex = re.compile(config.regex)

  bucket = s3.get_bucket(config.bucket)
  for key in bucket.list():
    mtime = time.mktime(time.strptime(key.last_modified.split(".")[0], "%Y-%m-%dT%H:%M:%S"))
    now = time.time()
    if mtime > (now - config.maxage):
      # Skip, file is young enough
      continue
    if config.regex.search(key.name) is None:
      # Skip, file does not match the pattern
      continue
    if config.delete:
      print "Deleting: s3://%s/%s" % (bucket.name, key.name)
      print " Key has age %d, older than --maxage %d" % (now - mtime, config.maxage)
      print " Key matches pattern /%s/" % (config.regex.pattern)
      key.delete()
    else:
      print "s3://%s/%s" % (bucket.name, key.name)

if __name__ == '__main__':
  sys.exit(main(sys.argv))
Something went wrong with that request. Please try again.