Skip to content

Commit

Permalink
Add current WIP to repository
Browse files Browse the repository at this point in the history
  • Loading branch information
crooks committed Apr 13, 2012
1 parent de68d97 commit 725e281
Show file tree
Hide file tree
Showing 6 changed files with 836 additions and 0 deletions.
77 changes: 77 additions & 0 deletions pyclean/Config.py
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/python
#
# vim: tabstop=4 expandtab shiftwidth=4 autoindent
#
# Copyright (C) 2012 Steve Crook <steve@mixmin.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# This file forms the start on some work to allow newsgroup filters to be
# added and auto-expired after a defined period.

import ConfigParser
import os
import sys

def makedir(d):
"""Check if a given directory exists. If it doesn't, check if the parent
exists. If it does then the new directory will be created. If not then
sensible options are exhausted and the program aborts.
"""
if not os.path.isdir(d):
parent = os.path.dirname(d)
if os.path.isdir(parent):
os.mkdir(d, 0700)
sys.stdout.write("%s: Directory created.\n" % d)
else:
msg = "%s: Unable to make directory. Aborting.\n" % d
sys.stdout.write(msg)
sys.exit(1)


# Configure the Config Parser.
config = ConfigParser.RawConfigParser()

# By default, all the paths are subdirectories of the homedir. We define the
# actual paths after reading the config file as they're relative to basedir.
config.add_section('paths')
homedir = os.path.expanduser('~')

# Logging
config.add_section('logging')
config.set('logging', 'level', 'info')
config.set('logging', 'format', '%(asctime)s %(levelname)s %(message)s')
config.set('logging', 'datefmt', '%Y-%m-%d %H:%M:%S')
config.set('logging', 'retain', 7)

# Define the basedir for pyclean. By default this will be ~/pyclean
basedir = os.path.join(homedir, 'pyclean')
makedir(basedir)

if 'PYCLEANETC' in os.environ:
config.set('paths', 'etc', os.environ['PYCLEANETC'])
else:
config.set('paths', 'etc', os.path.join(basedir, 'etc'))
makedir(config.get('paths', 'etc'))

if 'PYCLEANLOG' in os.environ:
config.set('paths', 'log', os.environ['PYCLEANLOG'])
else:
config.set('paths', 'log', os.path.join(basedir, 'log'))
makedir(config.get('paths', 'log'))

#with open('example.cfg', 'wb') as configfile:
# config.write(configfile)

configfile = os.path.join(config.get('paths', 'etc'), 'pyclean.cfg')
if os.path.isfile(configfile):
config.read(configfile)
87 changes: 87 additions & 0 deletions pyclean/Groups.py
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/python
#
# vim: tabstop=4 expandtab shiftwidth=4 autoindent
#
# Copyright (C) 2012 Steve Crook <steve@mixmin.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# This file forms the start on some work to allow newsgroup filters to be
# added and auto-expired after a defined period.

import re
import logging
from collections import defaultdict

class Groups():
def __init__(self):
self.regex = Regex()

def __getitem__(self, grptest):
return self.grp[grptest]

def __contains__(self, item):
if item in self.grp:
return True
return False

def analyze(self, newsgroups):
self.grp = defaultdict(lambda: 0)
nglist = str(newsgroups).split(',')
nglen = len(nglist)
for ng in nglist:
if self.regex.test.search(ng):
self.grp['test'] += 1
if self.regex.bin_allowed.search(ng):
self.grp['bin_allowed'] += 1
if self.regex.emp_exclude.search(ng):
self.grp['emp_exclude'] += 1
# Not all bools will be meaningful but it's easier to create them
# generically then specifically.
for ngelement in self.grp.keys():
ngbool = '%s_bool' % ngelement
self.grp[ngbool] = self.grp[ngelement] == nglen

class Regex():
def __init__(self):
# Test groups
test = ['\.test(ing)?(?:$|\.)',
'^es\.pruebas',
'^borland\.public\.test2',
'^cern\.testnews']
self.test = self.regex_compile(test)
# Binary groups
bin_allowed = ['^bin[a.]','\.bin[aei.]','\.bin$','^fur\.artwork',
'^alt\.anonymous\.messages$','^de\.alt\.dateien',
'^rec\.games\.bolo$','^comp\.security\.pgp\.test$',
'^sfnet\.tiedostot','^fido\.','^unidata\.',
'^alt\.security\.keydist','^linux\.debian\.bugs\.dist$',
'^lucky\.freebsd']
self.bin_allowed = self.regex_compile(bin_allowed)
# Exclude from all EMP filters
emp_exclude = ['alt\.anonymous\.messages']
self.emp_exclude = self.regex_compile(emp_exclude)
# Bad posting-hosts
bad_ph = ['newsguy\.com','tornevall\.net']
self.bad_ph = self.regex_compile(bad_ph)

def regex_compile(self, regexlist):
textual = '|'.join(regexlist).replace('||', '|')
return re.compile(textual)

if (__name__ == "__main__"):
groups = Groups()
newsgroups = 'alt.test,alt.testing.testing,alt.binaries.foo'
groups.analyze(newsgroups)
print groups['test_bool']
print groups['bin_allowed']
print groups['bin_allowed_bool']

Loading

0 comments on commit 725e281

Please sign in to comment.