-
Notifications
You must be signed in to change notification settings - Fork 1
/
userFunctions.py
185 lines (173 loc) · 8.63 KB
/
userFunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def ifTorrent(directory, filename, rssItemNode, retrievedLink, downloadDict,
threadName):
u"""Confirms that downloaded data is valid bencoded (torrent) data
If not, executes failedProcedure
"""
try: fd = open(os.path.join(directory, filename), 'rb')
except IOError, m:
logging.error( unicode(m) +
u" could not even open our just written file. leaving function..")
return None
try: fdT = bdecode( fd.read() )
except ValueError: fdT = False
if fdT: return True
else:
failedProcedure(u"The file %s wasn't actually torrent data. Attempting to remove from queue. Will add to failedDown" % filename , directory, filename, threadName, rssItemNode, downloadDict )
return False
def currentOnly(directory, filename, rssItemNode, retrievedLink, downloadDict,
threadName):
u"""checks to make sure that the item was added recently.
Useful for feeds that get messed up on occasion."""
try: maxage = getConfig().getint(threadName, 'maxage')
except (ValueError, ConfigParser.NoOptionError): maxage = 86400
if time.time() - time.mktime( rssItemNode['updated_parsed'] ) > maxage:
try: os.unlink( os.path.join(directory, filename) )
except OSError:
logging.critical(u"could not remove file from disk: %s" % filename)
global rss
if rss: rss.delItem()
def noRss(*args):
u"""removes the downloaded item from RSS,
in case you do not want to generate an rss feed of some downloaded items"""
global rss
if rss: rss.delItem()
def saveFeed(page, ppage, retrievedLink, threadName):
u"""Uses MakeRss to generate an archive of rss items.
Useful for later perusal by a human read rss reader,
without having to hit up the server multiple times.
Set the filename with the config option 'rssfile'; length with 'rsslength'
in your THREAD's configuration section (for each thread you want to save)
otherwise, defaults to sectionName.xml and 100.
WILL generate an invalid feed that may break some readers.
See issue #3 (link below).
http://code.google.com/p/rssdler/issues/detail?id=3
"""
try: filename= getConfig().get(threadName, 'rssfile')
except ConfigParser.NoOptionError: filename = "%s.xml" % threadName
try: length = getConfig().getint(threadName, 'rsslength')
except ConfigParser.NoOptionError: length = 100
rssl = MakeRss(filename=filename, parse=True)
rssl.channelMeta = ppage['feed']
links = [ x['link'] for x in rssl.itemsQuaDict ]
for x in reversed(ppage['entries']):
if x['link'] not in links: rssl.addItem(x)
rssl.close(length=length)
rssl.write()
def rewriteFeed(*args):
u"""Rewrite Description to make links to profile page,
uses options rewriteregex, rewritelink, and rewritetext to
manipulate entries."""
page, ppage, retrievedLink, threadName = args
try: search = getConfig().get(threadName, 'rewriteregex')
except ConfigParser.NoOptionError: saveFeed(*args)
try: baselink = getConfig().get(threadName, 'rewritelink')
except ConfigParser.NoOptionError: saveFeed(*args)
try: addtext = getConfig().get(threadName, 'rewritetext')
except ConfigParser.NoOptionError: saveFeed(*args)
for i, entry in enumerate(ppage['entries']):
try: link = "%s%s" % (baselink, re.search(search, entry['link']).group(1))
except AttributeError: link = baselink
text = """<a href="%s">%s</a> """ % (link, addtext)
ppage['entries'][i]['description'] = "%s %s" %( text, entry['description'])
saveFeed(page, ppage, retrievedLink, threadName)
def downloadFromSomeSite( directory, filename, rssItemNode, retrievedLink,
downloadDict, threadName ):
"""download a file from an html page.
set two options in your thread configuration
baselink: the baseurl of the site (should include a trailing /
example: http://cnn.com/
urlsearch: a string that will be present in the url for any given download.
e.g. /download/, /torrent/, gettorrent.php, etc.
by default, this is not a regular expression, but a code snippet is provided
in the source if you want to treat it as one.
depends on libxml2dom
assumes you want to grab a torrent file.
Comment out last two lines if you want a non-torrent file
"""
try:
baselink = getConfig().get(threadName, 'baselink')
urlsearch = getConfig().get(threadName, 'urlsearch')
except ConfigParser.NoOptionError:
logging.critical("""To use downloadFromSomeSite function, \
you must provide options baselink and urlsearch in your config""")
return None
global libxml2dom
try: libxml2dom
except NameError: import libxml2dom
try: a = codecs.open( os.path.join( directory, filename ), 'rb' )
except IOError, m:
failedProcedure( u"""%s: could not even open our just written file.leaving \
function..""" % m, directory, filename, threadName, rssItemNode, downloadDict)
return None
p = libxml2dom.parseString( a.read(), html=True)
try: link = "%s%s" % (baselink , [x.getAttribute('href') for x in
p.getElementsByTagName('a') if x.hasAttribute('href') and
x.getAttribute('href').count(urlsearch) ][0] )
# if you want a regex. Then, instead of
# x.getAttribute('href').count(urlsearch) do:
# re.search(urlsearch, x.getAttribute('href'))
except IndexError, m:
failedProcedure( u"""%s: could not find href for downloaded %s item for \
redownload""" % (m, threadName), directory, filename, threadName,
rssItemNode, downloadDict)
return None
try: d = downloader(link)
except (urllib2.HTTPError, urllib2.URLError, httplib.HTTPException), m:
failedProcedure( '%s: could not download torrent from site' % m,
directory, filename, threadName, rssItemNode, downloadDict)
return None
newfilename = getFilenameFromHTTP( d.info(), d.geturl() )
newfilename = writeNewFile( newfilename, directory, d )
# assume we want a torrent file
if ifTorrent( directory, newfilename, rssItemNode, retrievedLink,
downloadDict, threadName):
os.unlink( os.path.join(directory, filename) )
def failedProcedure( message, directory, filename, threadName, rssItemNode,
downloadDict ):
u"""A function to take care of failed downloads.
cleans up saved, failed, rss, the directory/filename, and prints to the log.
should be called from other functions here, not directly from RSSDler."""
global saved
logging.critical(unicodeC(message))
saved.failedDown.append( FailedItem( saved.downloads.pop(), threadName,
rssItemNode, downloadDict) )
try: os.unlink(os.path.join(directory, filename))
except OSError:
logging.critical(u"could not remove file from disk: %s" % filename)
if rss: rss.delItem()
def advanceEpisode(downloadDict, threadName, regex=r'^(\D+\d+\D+)(\d+)(\D*)$',
regNum=2, regSub=r'\g<1>%02d\g<3>'):
u"""The intent is to advance the episode implicitly defined in a filter.
The function works simply by using the regex to get the episode number,
the matching parentheses of which is defined by regNum.
The function then advances the episode number by 1 and
substitutes it back into the space defined by regSub,
which means you should have ONE interpolation (%d, etc.) defined
It then changes the config dictionary, pushes it into the ConfigParser and
saves it to the config file, which will kill the organization and comments of
your config file (blame ConfigParser).
The defaults will NOT work if you have numbers in the name of the show.
The function should not be called directly as a postDownloadFunction
but rather make your own wrappers for specific episodes and content type. e.g.
def weeds(*args):
if ifTorrent(*args): advanceEpisode( args[4], args[5])
you can, of course, define custom regular expressions to match your own
filters and use them in the call to advanceEpisode.
This function expects the format of the filter to be something like:
name.of.the.show.SEASONbreakNumberMaybesomeMore
e.g. weeds.*4.*01.*hdtv
as indicated, the '.*hdtv' part is optional.
if this does not fit your show syntax
you will need to customize the regular expressions."""
global configFile
if ( downloadDict['localTrue'] == None or
not getConfig()['threads'][threadName]['downloads'].count(downloadDict)):
return # no episode to match
index = getConfig()['threads'][threadName]['downloads'].index(downloadDict)
try: e = int(re.search(regex, downloadDict['localTrue']).group(regNum)) +1
except (ValueError, IndexError): return # no episode to match
s = re.sub(regex,regSub % e, downloadDict['localTrue'])
getConfig()['threads'][threadName]['downloads'][index]['localTrue'] = s
getConfig().push()
getConfig().write(configFile)
#getConfig(reload=True) # SHOULD be unnecessary