Merge pull request #45 from joaquinpf/batoto_xml_fixes

Batoto fixes
jklmli · May 31, 2014 · f8c3e00 · f8c3e00
2 parents 23713ca + 16c1989
commit f8c3e00
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 32 deletions.
diff --git a/src/parsers/batoto.py b/src/parsers/batoto.py
@@ -43,15 +43,19 @@ def parseSite(self):
         a = soup.find("div", id="comic_search_results")
         r = a.tbody.find_all("tr")[1:]
         seriesl = []
-        try:
-            for i in r:
-                u = i.td.a['href']
-                t = i.td.a.img.next_sibling[1:]
+        for i in r:
+            try:
+                e = i.td.findAll('a')[1]
+                u = e['href']
+                t = e.img.next_sibling[1:]
                 seriesl.append((u,t.encode('utf-8')))
-        except TypeError:
+            except:
+                pass
+
+        if not seriesl:
             # signifies no manga found
             raise self.MangaNotFound("Nonexistent.")
-            
+
         manga = self.selectFromResults(seriesl)
         if self.verbose_FLAG:
             print(manga)
@@ -62,22 +66,34 @@ def parseSite(self):
         cl = t.find_all("tr", class_="lang_English")
         self.chapters = [[]]
         cnum = self.chapters[0]
+
         for i in cl:
             u = i.td.a['href']
             t = i.td.a.img.next_sibling[1:]
             g = i.find_all("td")[2].get_text().strip()
+
             try:
                 c = float(re.search("ch([\d.]+)", u).group(1))
+                c = str(int(c)) if c.is_integer() else str(c)
             except AttributeError:
                 c = 0
-            tu = (u,t,g,c)
+            tu = (u,t,c,g)
             if len(cnum) == 0 or cnum[0][3] == c:
                 cnum.append(tu)
             else:
                 self.chapters.append([])
                 cnum = self.chapters[-1]
                 cnum.append(tu)
+
         self.chapters.reverse()
+
+        #Look for first chapter that should be downloaded in auto mode
+        lowerRange = 0
+        if (self.auto):
+            for i in range(0, len(self.chapters)):
+                if (self.lastDownloaded == self.chapters[i][0][1]):
+                    lowerRange = i + 1
+
         sc = None
         for i in self.chapters:
             if len(i) == 1 or sc == None:
@@ -100,9 +116,21 @@ def parseSite(self):
             sc = i[0]
             del i[1:]
         self.chapters = [i[0] for i in self.chapters]
-        for n,c in enumerate(self.chapters):
-            print("{:03d}. {}".format(n+1, c[1].encode('utf-8')))
-        self.chapters_to_download = self.selectChapters(self.chapters)
+
+        upperRange = len(self.chapters)
+        # which ones do we want?
+        if (not self.auto):
+            for n,c in enumerate(self.chapters):
+                print("{:03d}. {}".format(n+1, c[1].encode('utf-8')))
+            self.chapters_to_download = self.selectChapters(self.chapters)
+        # XML component
+        else:
+            if ( lowerRange == upperRange):
+                raise self.NoUpdates
+
+            for i in range (lowerRange, upperRange):
+                self.chapters_to_download.append(i)
+        return
 
     def downloadChapter(self, downloadThread, max_pages, url, manga_chapter_prefix, current_chapter):
         """We ignore max_pages, because you can't regex-search that under Batoto."""

diff --git a/src/xmlparser.py b/src/xmlparser.py
@@ -21,65 +21,65 @@ def downloadManga(self):
 		print("Parsing XML File...")
 		if (self.verbose_FLAG):
 			print("XML Path = %s" % self.xmlfile_path)
- 
+
 		dom = minidom.parse(self.xmlfile_path)
-		
+
 		threadPool = []
 		self.options.auto = True
-		
+
 		SetOutputPathToName_Flag = False
 		# Default OutputDir is the ./MangaName
 		if (self.options.outputDir == 'DEFAULT_VALUE'):
 			SetOutputPathToName_Flag = True
-			
+
 		for node in dom.getElementsByTagName("MangaSeries"):
 			seriesOptions = self.options
 			seriesOptions.manga = getText(node.getElementsByTagName('name')[0])
 			seriesOptions.site = getText(node.getElementsByTagName('HostSite')[0])
-			
+
 			try:
 				lastDownloaded = getText(node.getElementsByTagName('LastChapterDownloaded')[0])
 			except IndexError:
 				lastDownloaded = ""
-			
+
 			try:
 				download_path =	getText(node.getElementsByTagName('downloadPath')[0])
 			except IndexError:
-				download_path = ('./' + fixFormatting(seriesOptions.manga))
-			
+				download_path = ('./' + fixFormatting(seriesOptions.manga, seriesOptions.spaceToken))
+
 			if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs(download_path):
 				download_path = os.path.join(self.options.downloadPath, download_path)
-			
+
 			seriesOptions.downloadPath = download_path
 			seriesOptions.lastDownloaded = lastDownloaded
 			if SetOutputPathToName_Flag:
 				seriesOptions.outputDir = download_path
-			
-			# Because the SiteParserThread constructor parses the site to retrieve which chapters to 
+
+			# Because the SiteParserThread constructor parses the site to retrieve which chapters to
 			# download the following code would be faster
-			
+
 			# thread = SiteParserThread(self.options, dom, node)
 			# thread.start()
 			# threadPool.append(thread)
-			
-			# Need to remove the loop which starts the thread's downloading. The disadvantage is that the 
-			# the print statement would intermingle with the progress bar. It would be very difficult to 
+
+			# Need to remove the loop which starts the thread's downloading. The disadvantage is that the
+			# the print statement would intermingle with the progress bar. It would be very difficult to
 			# understand what was happening. Do not believe this change is worth it.
-			
+
 			threadPool.append(SiteParserThread(seriesOptions, dom, node))
-		
-		for thread in threadPool: 
+
+		for thread in threadPool:
 			thread.start()
 			thread.join()
 
-		#print (dom.toxml())		
 		#Backs up file
 		backupFileName = self.xmlfile_path + "_bak"
 		os.rename(self.xmlfile_path, backupFileName)
 		f = open(self.xmlfile_path, 'w')
-		outputStr = dom.toxml()
+
+		outputStr = '\n'.join([line for line in dom.toprettyxml().split('\n') if line.strip()])
 		outputStr = outputStr.encode('utf-8')
-		f.write(outputStr) 
-		
+		f.write(outputStr)
+
 		# The file was succesfully saved and now remove backup
 		os.remove(backupFileName)