Permalink
Cannot retrieve contributors at this time
#!/usr/bin/python3 | |
"""Make RSS for file list. | |
It requires rss.py by Fuktommy. | |
Synopsis: | |
mkrss.py [/path/to/html/dir] > rss.xml | |
mkrss.py -b /path/to/html/dir file_list > rss.xml | |
find /path/to/html/dir -type f | \ | |
mkrss.py -b /path/to/html/dir > rss.xml | |
Options: | |
-h header_file: File includes title for RSS. | |
It generates RSS for file, if file.txt exist or file is a HTML. | |
If file is HTML, it use <title> tag and <meta name="description"...> tag | |
and use Git log comment for timestamp. | |
Header file: | |
version: 1.0/2.0 # version of RSS | |
parent: URI # URI of parent of documents | |
uri: URI # URI of RSS | |
link: URI # URI of main page | |
title: string # title of RSS | |
description: string # description of RSS | |
Text file: | |
Top line is the title. | |
Following line is description. | |
""" | |
# | |
# Copyright (c) 2005-2019 Satoshi Fukutomi <info@fuktommy.com>. | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions | |
# are met: | |
# 1. Redistributions of source code must retain the above copyright | |
# notice, this list of conditions and the following disclaimer. | |
# 2. Redistributions in binary form must reproduce the above copyright | |
# notice, this list of conditions and the following disclaimer in the | |
# documentation and/or other materials provided with the distribution. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND | |
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE | |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
# SUCH DAMAGE. | |
# | |
import fileinput | |
import os | |
import subprocess | |
import sys | |
import re | |
import time | |
from html.parser import HTMLParser | |
import rss # module written by Fuktommy | |
# | |
# Configuration | |
# | |
rss_version = "1.0" | |
encode = "utf-8" | |
parent_uri = "http://example.com/files/" | |
link = parent_uri | |
rss_uri = parent_uri + "rss.xml" | |
title = "Files" | |
description = "My Files" | |
xsl = "" | |
header_file = "" | |
os.environ['TZ'] = 'UTC' | |
time.tzset() | |
def read_header(f): | |
"""Read header file. | |
version: 1.0/2.0 # version of RSS | |
parent: URI # URI of parent of documents | |
uri: URI # URI of RSS | |
link: URI # URI of main page | |
title: string # title of RSS | |
description: string # description of RSS | |
xsl: path # PATH or URI of XSL | |
""" | |
global rss_version, parent_uri, rss_uri, link, title, description, xsl | |
re_iscomment = re.compile(r"^\s*#") | |
re_keyval = re.compile(r"\s*:\s*") | |
del_space = re.compile(r"^\s*") | |
del_eof = re.compile(r'[\r\n]*') | |
last = "" | |
conf = {} | |
for line in open(f): | |
if line == "\n": | |
continue | |
elif re_iscomment.search(line): | |
continue | |
line = del_space.sub("", line) | |
line = del_eof.sub("", line) | |
try: | |
(key, val) = re_keyval.split(line, 1) | |
conf[key] = val | |
last = key | |
except ValueError: | |
conf[last] += line | |
for key in conf: | |
if key == "version": | |
rss_version =conf[key] | |
elif key == "parent": | |
parent_uri = conf[key] | |
elif key == "uri": | |
rss_uri = conf[key] | |
elif key == "link": | |
link = conf[key] | |
elif key == "title": | |
title = conf[key] | |
elif key == "description": | |
description = conf[key] | |
elif key == "xsl": | |
xsl = conf[key] | |
def read_text(f): | |
"""Read from text file. | |
Top line is the title. | |
Following line is description. | |
""" | |
txt = open(f + ".txt") | |
title = txt.readline() | |
desc = '' | |
for line in txt: | |
desc += line | |
txt.close() | |
return (title, desc) | |
class HtmlReader(HTMLParser): | |
"""Read from HTML. | |
Use <title> tag and <meta name="description"...> tag. | |
""" | |
attrs = {} | |
now = "" | |
encode = "" | |
re_xmlencode = re.compile(r"xml.*encoding=.([^<>\"']+)..*\?$") | |
re_htmlencode = re.compile(r"charset=([^<>\"'/]+)") | |
del_space = re.compile(r"^\s*|\s*$") | |
def __init__(self): | |
"""Constructor.""" | |
HTMLParser.__init__(self) | |
self.attrs = {"title": "", "description": ""} | |
self.now = "" | |
self.encode = encode | |
def dic_attr(self, attrs): | |
"""Convert attrs list to dictionary.""" | |
da = {} | |
for i in attrs: | |
da[i[0]] = i[1] | |
return da | |
def handle_starttag(self, tag, attrs): | |
"""Overwrite method.""" | |
attrs = self.dic_attr(attrs) | |
if tag == "title": | |
self.now = "title" | |
elif tag == "meta" \ | |
and "name" in attrs \ | |
and "content" in attrs \ | |
and attrs["name"] == "description": | |
self.attrs["description"] = attrs["content"] | |
elif tag == "meta" \ | |
and "http-equiv" in attrs \ | |
and "content" in attrs\ | |
and attrs["http-equiv"].lower() == "content-type": | |
htmlencode = self.re_htmlencode.search(attrs["content"]) | |
self.encode = htmlencode.group(1) | |
else: | |
self.now = "" | |
def handle_data(self, data): | |
"""Overwrite method.""" | |
if self.now: | |
self.attrs[self.now] += data | |
def handle_pi(self, data): | |
"""Overwrite method.""" | |
xmlencode = self.re_xmlencode.search(data) | |
if xmlencode: | |
self.encode = xmlencode.group(1) | |
def getData(self): | |
"""Return title and description.""" | |
for k in ("title", "description"): | |
data = self.attrs[k] | |
self.attrs[k] = self.del_space.sub("", data, 0) | |
return (self.attrs["title"], self.attrs["description"]) | |
def read_html(f): | |
"""Read from HTML. | |
Wrapper of HtmlReader. | |
""" | |
html = open(f) | |
parser = HtmlReader() | |
parser.feed(html.read()) | |
(title, description) = parser.getData() | |
parser.close() | |
return(title, description) | |
def findfiles(dir): | |
"""Search directorys for files. | |
It works lile ``find dir -type f''. | |
""" | |
files = [] | |
buf = [] | |
for f in os.listdir(dir): | |
buf.append(dir + "/" + f) | |
while len(buf) > 0: | |
f = buf.pop(0) | |
if os.path.islink(f): | |
pass | |
elif os.path.isdir(f): | |
for g in os.listdir(f): | |
buf.append(f + "/" + g) | |
elif os.path.isfile(f): | |
files.append(f) | |
return files | |
def get_date(filename): | |
command = ['git', 'log', '-n', '1', '--pretty=format:%at', '--', filename] | |
pipe = subprocess.Popen(command, stdout=subprocess.PIPE) | |
try: | |
unixtime = int(pipe.stdout.read()) | |
except ValueError: | |
unixtime = os.path.getmtime(filename) | |
pipe.wait() | |
return unixtime | |
# | |
# main | |
# | |
# | |
# make list of files | |
# | |
dir = "" | |
files = [] | |
lists = [] | |
mode = "self_find" # self_find / given_list | |
sys.argv.pop(0) | |
while (sys.argv): | |
i = sys.argv.pop(0) | |
if i == "-b": | |
mode = "given_list" | |
dir = sys.argv.pop(0) | |
elif i == "-h": | |
read_header(sys.argv.pop(0)) | |
else: | |
lists.append(i) | |
if mode == "self_find" and len(lists) == 1: | |
dir = lists[0] | |
files = findfiles(dir) | |
elif mode == "self_find" and len(lists) == 0: | |
dir = "." | |
files = findfiles(dir) | |
elif mode == "given_list": | |
for line in fileinput.input(): | |
files.append(*line.splitlines()) | |
# | |
# Generate RSS | |
# | |
list = rss.RSS(encode=encode, title=title, | |
parent=parent_uri, uri=rss_uri, link=link, | |
description=description, xsl=xsl) | |
ishtml = re.compile(r"\.html$") | |
del_index = re.compile(r"index\.html$") | |
for f in files: | |
date = None | |
title = "" | |
desc = "" | |
date = get_date(f) | |
if os.path.isfile(f + ".txt"): | |
title, desc = read_text(f) | |
elif ishtml.search(f): | |
title, desc = read_html(f) | |
f = del_index.sub("", f) | |
f = ishtml.sub("", f) | |
if title: | |
f = f[len(dir)+1:] | |
list.append(f, title=title, date=date, description=desc) | |
if rss_version == "1.0": | |
sys.stdout.write(rss.make_rss1(list)) | |
elif rss_version == "2.0": | |
sys.stdout.write(rss.make_rss2(list)) |