Permalink
Browse files

Add current WIP to repository

  • Loading branch information...
crooks committed Apr 13, 2012
1 parent de68d97 commit 725e28187fe29bdef37d914579be545f7ed2bbd0
Showing with 836 additions and 0 deletions.
  1. +77 −0 pyclean/Config.py
  2. +87 −0 pyclean/Groups.py
  3. +299 −0 pyclean/PyClean.py
  4. +142 −0 pyclean/emp.py
  5. +189 −0 pyclean/filter_innd.py
  6. +42 −0 pyclean/timing.py
View
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+#
+# vim: tabstop=4 expandtab shiftwidth=4 autoindent
+#
+# Copyright (C) 2012 Steve Crook <steve@mixmin.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# This file forms the start on some work to allow newsgroup filters to be
+# added and auto-expired after a defined period.
+
+import ConfigParser
+import os
+import sys
+
+def makedir(d):
+ """Check if a given directory exists. If it doesn't, check if the parent
+ exists. If it does then the new directory will be created. If not then
+ sensible options are exhausted and the program aborts.
+
+ """
+ if not os.path.isdir(d):
+ parent = os.path.dirname(d)
+ if os.path.isdir(parent):
+ os.mkdir(d, 0700)
+ sys.stdout.write("%s: Directory created.\n" % d)
+ else:
+ msg = "%s: Unable to make directory. Aborting.\n" % d
+ sys.stdout.write(msg)
+ sys.exit(1)
+
+
+# Configure the Config Parser.
+config = ConfigParser.RawConfigParser()
+
+# By default, all the paths are subdirectories of the homedir. We define the
+# actual paths after reading the config file as they're relative to basedir.
+config.add_section('paths')
+homedir = os.path.expanduser('~')
+
+# Logging
+config.add_section('logging')
+config.set('logging', 'level', 'info')
+config.set('logging', 'format', '%(asctime)s %(levelname)s %(message)s')
+config.set('logging', 'datefmt', '%Y-%m-%d %H:%M:%S')
+config.set('logging', 'retain', 7)
+
+# Define the basedir for pyclean. By default this will be ~/pyclean
+basedir = os.path.join(homedir, 'pyclean')
+makedir(basedir)
+
+if 'PYCLEANETC' in os.environ:
+ config.set('paths', 'etc', os.environ['PYCLEANETC'])
+else:
+ config.set('paths', 'etc', os.path.join(basedir, 'etc'))
+makedir(config.get('paths', 'etc'))
+
+if 'PYCLEANLOG' in os.environ:
+ config.set('paths', 'log', os.environ['PYCLEANLOG'])
+else:
+ config.set('paths', 'log', os.path.join(basedir, 'log'))
+makedir(config.get('paths', 'log'))
+
+#with open('example.cfg', 'wb') as configfile:
+# config.write(configfile)
+
+configfile = os.path.join(config.get('paths', 'etc'), 'pyclean.cfg')
+if os.path.isfile(configfile):
+ config.read(configfile)
View
@@ -0,0 +1,87 @@
+#!/usr/bin/python
+#
+# vim: tabstop=4 expandtab shiftwidth=4 autoindent
+#
+# Copyright (C) 2012 Steve Crook <steve@mixmin.net>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# This file forms the start on some work to allow newsgroup filters to be
+# added and auto-expired after a defined period.
+
+import re
+import logging
+from collections import defaultdict
+
+class Groups():
+ def __init__(self):
+ self.regex = Regex()
+
+ def __getitem__(self, grptest):
+ return self.grp[grptest]
+
+ def __contains__(self, item):
+ if item in self.grp:
+ return True
+ return False
+
+ def analyze(self, newsgroups):
+ self.grp = defaultdict(lambda: 0)
+ nglist = str(newsgroups).split(',')
+ nglen = len(nglist)
+ for ng in nglist:
+ if self.regex.test.search(ng):
+ self.grp['test'] += 1
+ if self.regex.bin_allowed.search(ng):
+ self.grp['bin_allowed'] += 1
+ if self.regex.emp_exclude.search(ng):
+ self.grp['emp_exclude'] += 1
+ # Not all bools will be meaningful but it's easier to create them
+ # generically then specifically.
+ for ngelement in self.grp.keys():
+ ngbool = '%s_bool' % ngelement
+ self.grp[ngbool] = self.grp[ngelement] == nglen
+
+class Regex():
+ def __init__(self):
+ # Test groups
+ test = ['\.test(ing)?(?:$|\.)',
+ '^es\.pruebas',
+ '^borland\.public\.test2',
+ '^cern\.testnews']
+ self.test = self.regex_compile(test)
+ # Binary groups
+ bin_allowed = ['^bin[a.]','\.bin[aei.]','\.bin$','^fur\.artwork',
+ '^alt\.anonymous\.messages$','^de\.alt\.dateien',
+ '^rec\.games\.bolo$','^comp\.security\.pgp\.test$',
+ '^sfnet\.tiedostot','^fido\.','^unidata\.',
+ '^alt\.security\.keydist','^linux\.debian\.bugs\.dist$',
+ '^lucky\.freebsd']
+ self.bin_allowed = self.regex_compile(bin_allowed)
+ # Exclude from all EMP filters
+ emp_exclude = ['alt\.anonymous\.messages']
+ self.emp_exclude = self.regex_compile(emp_exclude)
+ # Bad posting-hosts
+ bad_ph = ['newsguy\.com','tornevall\.net']
+ self.bad_ph = self.regex_compile(bad_ph)
+
+ def regex_compile(self, regexlist):
+ textual = '|'.join(regexlist).replace('||', '|')
+ return re.compile(textual)
+
+if (__name__ == "__main__"):
+ groups = Groups()
+ newsgroups = 'alt.test,alt.testing.testing,alt.binaries.foo'
+ groups.analyze(newsgroups)
+ print groups['test_bool']
+ print groups['bin_allowed']
+ print groups['bin_allowed_bool']
+
Oops, something went wrong.

0 comments on commit 725e281

Please sign in to comment.