Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 324 lines (275 sloc) 8.63 KB
#!/usr/bin/python
"""Make RSS for file list.
It requires rss.py by Fuktommy.
Synopsis:
mkrss.py [/path/to/html/dir] > rss.xml
mkrss.py -b /path/to/html/dir file_list > rss.xml
find /path/to/html/dir -type f | \
mkrss.py -b /path/to/html/dir > rss.xml
Options:
-h header_file: File includes title for RSS.
It generates RSS for file, if file.txt exist or file is a HTML.
If file is HTML, it use <title> tag and <meta name="description"...> tag
and use Git log comment for timestamp.
Header file:
version: 1.0/2.0 # version of RSS
parent: URI # URI of parent of documents
uri: URI # URI of RSS
link: URI # URI of main page
title: string # title of RSS
description: string # description of RSS
Text file:
Top line is the title.
Following line is description.
"""
#
# Copyright (c) 2005-2012 Satoshi Fukutomi <info@fuktommy.com>.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
import HTMLParser
import fileinput
import os
import subprocess
import sys
import re
import time
import rss # module written by Fuktommy
#
# Configuration
#
rss_version = "1.0"
encode = "utf-8"
parent_uri = "http://example.com/files/"
link = parent_uri
rss_uri = parent_uri + "rss.xml"
title = "Files"
description = "My Files"
xsl = ""
header_file = ""
os.environ['TZ'] = 'UTC'
time.tzset()
def read_header(f):
"""Read header file.
version: 1.0/2.0 # version of RSS
parent: URI # URI of parent of documents
uri: URI # URI of RSS
link: URI # URI of main page
title: string # title of RSS
description: string # description of RSS
xsl: path # PATH or URI of XSL
"""
global rss_version, parent_uri, rss_uri, link, title, description, xsl
re_iscomment = re.compile(r"^\s*#")
re_keyval = re.compile(r"\s*:\s*")
del_space = re.compile(r"^\s*")
del_eof = re.compile(r'[\r\n]*')
last = ""
conf = {}
for line in file(f):
if line == "\n":
continue
elif re_iscomment.search(line):
continue
line = del_space.sub("", line)
line = del_eof.sub("", line)
try:
(key, val) = re_keyval.split(line, 1)
conf[key] = val
last = key
except ValueError:
conf[last] += line
for key in conf:
if key == "version":
rss_version =conf[key]
elif key == "parent":
parent_uri = conf[key]
elif key == "uri":
rss_uri = conf[key]
elif key == "link":
link = conf[key]
elif key == "title":
title = conf[key]
elif key == "description":
description = conf[key]
elif key == "xsl":
xsl = conf[key]
def read_text(f):
"""Read from text file.
Top line is the title.
Following line is description.
"""
txt = file(f + ".txt")
title = txt.readline()
desc = ''
for line in txt:
desc += line
txt.close()
return (title, desc)
class HtmlReader(HTMLParser.HTMLParser):
"""Read from HTML.
Use <title> tag and <meta name="description"...> tag.
"""
attrs = {}
now = ""
encode = ""
re_xmlencode = re.compile(r"xml.*encoding=.([^<>\"']+)..*\?$")
re_htmlencode = re.compile(r"charset=([^<>\"'/]+)")
del_space = re.compile(r"^\s*|\s*$")
def __init__(self):
"""Constructor."""
HTMLParser.HTMLParser.__init__(self)
self.attrs = {"title": "", "description": ""}
self.now = ""
self.encode = encode
def dic_attr(self, attrs):
"""Convert attrs list to dictionary."""
da = {}
for i in attrs:
da[i[0]] = i[1]
return da
def handle_starttag(self, tag, attrs):
"""Overwrite method."""
attrs = self.dic_attr(attrs)
if tag == "title":
self.now = "title"
elif tag == "meta" \
and "name" in attrs \
and "content" in attrs \
and attrs["name"] == "description":
self.attrs["description"] = attrs["content"]
elif tag == "meta" \
and "http-equiv" in attrs \
and "content" in attrs\
and attrs["http-equiv"].lower() == "content-type":
htmlencode = self.re_htmlencode.search(attrs["content"])
self.encode = htmlencode.group(1)
else:
self.now = ""
def handle_data(self, data):
"""Overwrite method."""
if self.now:
self.attrs[self.now] += data
def handle_pi(self, data):
"""Overwrite method."""
xmlencode = self.re_xmlencode.search(data)
if xmlencode:
self.encode = xmlencode.group(1)
def getData(self):
"""Return title and description."""
for k in ("title", "description"):
data = unicode(self.attrs[k], self.encode).encode(encode)
self.attrs[k] = self.del_space.sub("", data, 0)
return (self.attrs["title"], self.attrs["description"])
def read_html(f):
"""Read from HTML.
Wrapper of HtmlReader.
"""
html = file(f)
parser = HtmlReader()
parser.feed(html.read())
(title, description) = parser.getData()
parser.close()
return(title, description)
def findfiles(dir):
"""Search directorys for files.
It works lile ``find dir -type f''.
"""
files = []
buf = []
for f in os.listdir(dir):
buf.append(dir + "/" + f)
while len(buf) > 0:
f = buf.pop(0)
if os.path.islink(f):
pass
elif os.path.isdir(f):
for g in os.listdir(f):
buf.append(f + "/" + g)
elif os.path.isfile(f):
files.append(f)
return files
def get_date(filename):
command = ['git', 'log', '-n', '1', '--pretty=format:%at', '--', filename]
pipe = subprocess.Popen(command, stdout=subprocess.PIPE)
try:
unixtime = int(pipe.stdout.read())
except ValueError:
unixtime = os.path.getmtime(filename)
pipe.wait()
return unixtime
#
# main
#
#
# make list of files
#
dir = ""
files = []
lists = []
mode = "self_find" # self_find / given_list
sys.argv.pop(0)
while (sys.argv):
i = sys.argv.pop(0)
if i == "-b":
mode = "given_list"
dir = sys.argv.pop(0)
elif i == "-h":
read_header(sys.argv.pop(0))
else:
lists.append(i)
if mode == "self_find" and len(lists) == 1:
dir = lists[0]
files = findfiles(dir)
elif mode == "self_find" and len(lists) == 0:
dir = "."
files = findfiles(dir)
elif mode == "given_list":
for line in fileinput.input():
files.append(*line.splitlines())
#
# Generate RSS
#
list = rss.RSS(encode=encode, title=title,
parent=parent_uri, uri=rss_uri, link=link,
description=description, xsl=xsl)
ishtml = re.compile(r"\.html$")
del_index = re.compile(r"index\.html$")
for f in files:
date = None
title = ""
desc = ""
date = get_date(f)
if os.path.isfile(f + ".txt"):
title, desc = read_text(f)
elif ishtml.search(f):
title, desc = read_html(f)
f = del_index.sub("", f)
f = ishtml.sub("", f)
if title:
f = f[len(dir)+1:]
list.append(f, title=title, date=date, description=desc)
if rss_version == "1.0":
sys.stdout.write(rss.make_rss1(list))
elif rss_version == "2.0":
sys.stdout.write(rss.make_rss2(list))