Browse files

Updated parser structure

  • Loading branch information...
1 parent c262dad commit b5f085a0d18581cb69c1036f8ed469a9e24b20cd @jbohman committed Jul 16, 2010
View
10 README
@@ -23,13 +23,12 @@ The Cassandra bindings are not automatically installed.
Installation
------------
To install Logsandra you run this command:
-
+
python setup.py install
-It is also possible to directly start using logsandra-httpd.py and logsandra-monitord.py without installing Logsandra to your site-package directory if all
-required modules are all ready installed.
+It is also possible to use Logsandra without installing it, just make sure that all required Python modules are installed and ready to be imported.
-The current keyspace configuration for Cassandra needed for Logsandra to work is:
+The current Cassandra keyspace configuration needed by Logsandra:
<Keyspaces>
<Keyspace Name="logsandra">
@@ -43,11 +42,12 @@ The current keyspace configuration for Cassandra needed for Logsandra to work is
</Keyspace>
</Keyspaces>
-The keyspace definition might be obsolete in future releases of Cassandra (0.7+) where you can programmatically create and drop keyspaces.
+The keyspace definition might be obsolete in future releases of Cassandra (0.7+) and Logsandra where you can programmatically create and drop keyspaces.
Usage
-----
Configuration
-------------
+
View
9 logsandra.yaml
@@ -13,7 +13,8 @@ cassandra_timeout: '5'
# List of paths (files and directories) to monitor
paths:
- - name: ~/coding/cassandra/access.log
- recursive: False
- parser: 'clf'
- clf_format: '%h %l %u %t %r %s %O %{Referer}i %{User-Agent}i'
+ - name: ~/coding/cassandra/access.log
+ recursive: False
+ parser:
+ name: 'clf'
+ format: '%h %l %u %t %r %s %O %{Referer}i %{User-Agent}i'
View
3 logsandra/model/__init__.py
@@ -40,6 +40,9 @@ def add(self, date, entry, source, keywords):
if not keywords:
raise Error('Missing keywords')
+ # TODO: Better handling of dates?
+ date = date.replace(tzinfo=None)
+
key = uuid.uuid1()
self.client.cf_entries.insert(str(key.hex), {'ident': str(self.client.ident), 'source': source, 'date': date.strftime('%Y-%m-%d %H:%M:%S'), 'entry': str(entry)})
View
14 logsandra/monitor/monitor.py
@@ -20,7 +20,9 @@ def __init__(self, settings, tail=False):
self.client = CassandraClient(self.settings['ident'], self.settings['cassandra_address'], self.settings['cassandra_port'], self.settings['cassandra_timeout'])
self.tail = tail
- self.seek_data = {}
+ self.seek_position = {}
+
+ # TODO: Automatically load parsers
self.parsers = {'clf': ClfParser(LogEntry(self.client))}
def run(self):
@@ -35,23 +37,23 @@ def callback(self, filename, data):
self.logger.debug('A change occurred in file %s with data %s' % (filename, data))
try:
file_handler = open(filename, 'rb')
- if filename in self.seek_data:
- file_handler.seek(self.seek_data[filename])
+ if filename in self.seek_position:
+ file_handler.seek(self.seek_position[filename])
else:
if self.tail:
file_handler.seek(0, os.SEEK_END)
for line in file_handler:
line = line.strip()
- result = self.parsers[data['parser']].parse(line, data)
+ result = self.parsers[data['parser']['name']].parse(line, data['source'], data['parser'])
if result:
self.logger.debug('Parsed line: %s' % line)
else:
self.logger.error('Failed to parse line: %s' % line)
- # TODO: Persist seek_data
- self.seek_data[filename] = file_handler.tell()
+ # TODO: Persist seek_position
+ self.seek_position[filename] = file_handler.tell()
file_handler.close()
except IOError:
View
4 logsandra/monitor/parsers/__init__.py
@@ -0,0 +1,4 @@
+class BaseParser(object):
+
+ def __init__(self, log_entries):
+ self.log_entries = log_entries
View
16 logsandra/monitor/parsers/clf.py
@@ -1,6 +1,8 @@
import re
import dateutil.parser
+from logsandra.monitor.parsers import BaseParser
+
clf = {
'%h': r'(?P<host>\S+)',
'%l': r'\S+',
@@ -19,15 +21,11 @@
'%v': r'(?P<server>\S+)'
}
-class ClfParser(object):
-
- def __init__(self, log_entries):
- self.log_entries = log_entries
+class ClfParser(BaseParser):
- def parse(self, line, data):
- print data
+ def parse(self, line, source, data):
parts = []
- for element in data['clf_format'].split(' '):
+ for element in data['format'].split(' '):
parts.append(clf[element])
# TODO: optimize by storing compiled regex?
@@ -73,6 +71,6 @@ def parse(self, line, data):
keywords.append('server:%s' % result['server'])
keywords.append(result['server'])
- date = dateutil.parser.parse(result['time'], fuzzy=True).replace(tzinfo=None)
+ date = dateutil.parser.parse(result['time'], fuzzy=True)
- return self.log_entries.add(date, line, data['source'], keywords)
+ return self.log_entries.add(date=date, entry=line, source=source, keywords=keywords)

0 comments on commit b5f085a

Please sign in to comment.