Skip to content

Commit

Permalink
Merge pull request #45 from joaquinpf/batoto_xml_fixes
Browse files Browse the repository at this point in the history
Batoto fixes
  • Loading branch information
jklmli committed May 31, 2014
2 parents 23713ca + 16c1989 commit f8c3e00
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 32 deletions.
48 changes: 38 additions & 10 deletions src/parsers/batoto.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,19 @@ def parseSite(self):
a = soup.find("div", id="comic_search_results")
r = a.tbody.find_all("tr")[1:]
seriesl = []
try:
for i in r:
u = i.td.a['href']
t = i.td.a.img.next_sibling[1:]
for i in r:
try:
e = i.td.findAll('a')[1]
u = e['href']
t = e.img.next_sibling[1:]
seriesl.append((u,t.encode('utf-8')))
except TypeError:
except:
pass

if not seriesl:
# signifies no manga found
raise self.MangaNotFound("Nonexistent.")

manga = self.selectFromResults(seriesl)
if self.verbose_FLAG:
print(manga)
Expand All @@ -62,22 +66,34 @@ def parseSite(self):
cl = t.find_all("tr", class_="lang_English")
self.chapters = [[]]
cnum = self.chapters[0]

for i in cl:
u = i.td.a['href']
t = i.td.a.img.next_sibling[1:]
g = i.find_all("td")[2].get_text().strip()

try:
c = float(re.search("ch([\d.]+)", u).group(1))
c = str(int(c)) if c.is_integer() else str(c)
except AttributeError:
c = 0
tu = (u,t,g,c)
tu = (u,t,c,g)
if len(cnum) == 0 or cnum[0][3] == c:
cnum.append(tu)
else:
self.chapters.append([])
cnum = self.chapters[-1]
cnum.append(tu)

self.chapters.reverse()

#Look for first chapter that should be downloaded in auto mode
lowerRange = 0
if (self.auto):
for i in range(0, len(self.chapters)):
if (self.lastDownloaded == self.chapters[i][0][1]):
lowerRange = i + 1

sc = None
for i in self.chapters:
if len(i) == 1 or sc == None:
Expand All @@ -100,9 +116,21 @@ def parseSite(self):
sc = i[0]
del i[1:]
self.chapters = [i[0] for i in self.chapters]
for n,c in enumerate(self.chapters):
print("{:03d}. {}".format(n+1, c[1].encode('utf-8')))
self.chapters_to_download = self.selectChapters(self.chapters)

upperRange = len(self.chapters)
# which ones do we want?
if (not self.auto):
for n,c in enumerate(self.chapters):
print("{:03d}. {}".format(n+1, c[1].encode('utf-8')))
self.chapters_to_download = self.selectChapters(self.chapters)
# XML component
else:
if ( lowerRange == upperRange):
raise self.NoUpdates

for i in range (lowerRange, upperRange):
self.chapters_to_download.append(i)
return

def downloadChapter(self, downloadThread, max_pages, url, manga_chapter_prefix, current_chapter):
"""We ignore max_pages, because you can't regex-search that under Batoto."""
Expand Down
44 changes: 22 additions & 22 deletions src/xmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,65 +21,65 @@ def downloadManga(self):
print("Parsing XML File...")
if (self.verbose_FLAG):
print("XML Path = %s" % self.xmlfile_path)

dom = minidom.parse(self.xmlfile_path)

threadPool = []
self.options.auto = True

SetOutputPathToName_Flag = False
# Default OutputDir is the ./MangaName
if (self.options.outputDir == 'DEFAULT_VALUE'):
SetOutputPathToName_Flag = True

for node in dom.getElementsByTagName("MangaSeries"):
seriesOptions = self.options
seriesOptions.manga = getText(node.getElementsByTagName('name')[0])
seriesOptions.site = getText(node.getElementsByTagName('HostSite')[0])

try:
lastDownloaded = getText(node.getElementsByTagName('LastChapterDownloaded')[0])
except IndexError:
lastDownloaded = ""

try:
download_path = getText(node.getElementsByTagName('downloadPath')[0])
except IndexError:
download_path = ('./' + fixFormatting(seriesOptions.manga))
download_path = ('./' + fixFormatting(seriesOptions.manga, seriesOptions.spaceToken))

if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs(download_path):
download_path = os.path.join(self.options.downloadPath, download_path)

seriesOptions.downloadPath = download_path
seriesOptions.lastDownloaded = lastDownloaded
if SetOutputPathToName_Flag:
seriesOptions.outputDir = download_path
# Because the SiteParserThread constructor parses the site to retrieve which chapters to

# Because the SiteParserThread constructor parses the site to retrieve which chapters to
# download the following code would be faster

# thread = SiteParserThread(self.options, dom, node)
# thread.start()
# threadPool.append(thread)
# Need to remove the loop which starts the thread's downloading. The disadvantage is that the
# the print statement would intermingle with the progress bar. It would be very difficult to

# Need to remove the loop which starts the thread's downloading. The disadvantage is that the
# the print statement would intermingle with the progress bar. It would be very difficult to
# understand what was happening. Do not believe this change is worth it.

threadPool.append(SiteParserThread(seriesOptions, dom, node))
for thread in threadPool:

for thread in threadPool:
thread.start()
thread.join()

#print (dom.toxml())
#Backs up file
backupFileName = self.xmlfile_path + "_bak"
os.rename(self.xmlfile_path, backupFileName)
f = open(self.xmlfile_path, 'w')
outputStr = dom.toxml()

outputStr = '\n'.join([line for line in dom.toprettyxml().split('\n') if line.strip()])
outputStr = outputStr.encode('utf-8')
f.write(outputStr)
f.write(outputStr)

# The file was succesfully saved and now remove backup
os.remove(backupFileName)

0 comments on commit f8c3e00

Please sign in to comment.