FIX: (#181) 2000 AD exception added to csv for mulit-volume support, …

…FIX: ComicYear was parsed wrong when using multi-volume occassionally, IMP: Turned off console printing for parsing
evilhero · Feb 13, 2013 · 365d88c · 365d88c
1 parent ee6334c
commit 365d88c
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 21 deletions.
diff --git a/exceptions.csv b/exceptions.csv
@@ -14,6 +14,8 @@
 2,2127,none,1570/7794/11288
 #The Boys
 1,18033,none,19531/25058
+#2000 A.D.
+4,19752,none,11289/11295/11294/11292/11293
 #--------
 #-----Mismatched Names------
 99,3092,2605,none
@@ -27,3 +29,4 @@
 99,42821,60934,none
 99,42947,61242,none
 99,42322,60917,none
+99,53920,69871,none
diff --git a/mylar/importer.py b/mylar/importer.py
@@ -343,6 +343,8 @@ def addComictoDB(comicid,mismatch=None,pullupd=None):
             #print ("Existing status : " + str(iss_exists['Status']))
             newValueDict['Status'] = iss_exists['Status']     
 
+        #logger.fdebug("newValueDict:" + str(newValueDict))
+
         myDB.upsert("issues", newValueDict, controlValueDict)
         n+=1
 

diff --git a/mylar/parseit.py b/mylar/parseit.py
@@ -184,16 +184,16 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
             parsed = soup.find("div", {"id" : "series_data"})
             subtxt3 = parsed.find("dd", {"id" : "publication_dates"})
             resultPublished = subtxt3.findNext(text=True).rstrip()
-            print ("pubdate:" + str(resultPublished))
+            #print ("pubdate:" + str(resultPublished))
             coverst = soup.find("div", {"id" : "series_cover"})
             if coverst < 0: 
                 gcdcover = "None"
             else:
                 subcoverst = coverst('img',src=True)[0]
                 gcdcover = subcoverst['src']
 
-        print ("resultURL:" + str(resultURL))
-        print ("comicID:" + str(ComicID))
+        #print ("resultURL:" + str(resultURL))
+        #print ("comicID:" + str(ComicID))
         input2 = 'http://www.comics.org' + str(resultURL) + 'details/'
         resp = urllib2.urlopen(input2)
         soup = BeautifulSoup(resp)
@@ -203,18 +203,18 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
 
         type = soup.find(text=' On-sale date ')
         if type:
-            print ("on-sale date detected....adjusting")
+            #print ("on-sale date detected....adjusting")
             datetype = "on-sale"
         else:
-            print ("pub date defaulting")
+            #print ("pub date defaulting")
             datetype = "pub"
 
         cnt1 = len(soup.findAll("tr", {"class" : "row_even_False"}))
         cnt2 = len(soup.findAll("tr", {"class" : "row_even_True"}))
 
         cnt = int(cnt1 + cnt2)
 
-        print (str(cnt) + " Issues in Total (this may be wrong due to alternate prints, etc")
+        #print (str(cnt) + " Issues in Total (this may be wrong due to alternate prints, etc")
 
         n_odd = -1
         n_even = -1
@@ -236,7 +236,7 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
             fid = parsed('a',href=True)[0]
             resultGID = fid['href']
             resultID = resultGID[7:-1]
-            print ( "ID: " + str(resultID) )
+            #print ( "ID: " + str(resultID) )
 
             if ',' in ParseIssue: ParseIssue = re.sub("\,", "", ParseIssue)
             #print ("ParseIssue before : " + str(ParseIssue))
@@ -247,7 +247,7 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
             #however, if ONLY alternate covers exist of an issue it won't work.
             #let's use the FIRST record, and ignore all other covers for the given issue.
             isschk = ParseIssue[:isslen]
-            print ("Parse is now: " + str(isschk))
+            #print ("Parse is now: " + str(isschk))
 
             #check if decimal or '1/2' exists or not, and store decimal results
             halfchk = "no"
@@ -297,24 +297,24 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
                 #logger.fdebug("adding issue to db : " + str(ParseIssue))
                 # in order to get the compare right, let's decimialize the string to '.00'.
                 gcdinfo['ComicIssue'] = ParseIssue
-                print "Issue: " + str(ParseIssue)
+                #print "Issue: " + str(ParseIssue)
                 #^^ will retrieve issue
                 #if datetype == "on-sale":
                 subtxt1 = parsed('td')[2]
                 ParseDate = subtxt1.findNext(text=True)
                 pdlen = len(ParseDate)
-                print "sale-date..ParseDate:" + str(ParseDate)
-                print ("Parsed Date length: " + str(pdlen))
+                #print "sale-date..ParseDate:" + str(ParseDate)
+                #print ("Parsed Date length: " + str(pdlen))
                 if len(ParseDate) < 7:
                     subtxt3 = parsed('td')[0]
                     ParseDate = subtxt3.findNext(text=True)               
-                    print "pub-date..ParseDate:" + str(ParseDate)
+                    #print "pub-date..ParseDate:" + str(ParseDate)
                     if ParseDate == ' ':
                         #default to empty so doesn't error out.
                         ParseDate = "0000-00-00"
                 #ParseDate = ParseDate.replace('?','')
                 ParseDate = ParseDate.replace(' ','')
-                print "Parse date: " + str(ParseDate)
+                #print "Parse date: " + str(ParseDate)
                 gcdinfo['ComicDate'] = ParseDate
                 #^^ will retrieve date #
                 if not any(d.get('GCDIssue', None) == str(gcdinfo['ComicIssue']) for d in gcdchoice):
@@ -461,11 +461,11 @@ def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
     comicis = Total
     comicid = ComicID
     comicpub = ComicPublisher
-    print ( "comicname: " + str(comicnm) )
-    print ( "comicyear: " + str(comicyr) )
-    print ( "comichave: " + str(comicis) )
-    print ( "comicpub: " + str(comicpub) )
-    print ( "comicid: " + str(comicid) )
+    #print ( "comicname: " + str(comicnm) )
+    #print ( "comicyear: " + str(comicyr) )
+    #print ( "comichave: " + str(comicis) )
+    #print ( "comicpub: " + str(comicpub) )
+    #print ( "comicid: " + str(comicid) )
     # do 3 runs at the comics.org search to get the best results
     comicrun = []
     # &pub_name=DC

diff --git a/mylar/webserve.py b/mylar/webserve.py
@@ -179,7 +179,7 @@ def addComic(self, comicid, comicname=None, comicyear=None, comicimage=None, com
 
     def from_Exceptions(self, comicid, gcdid, comicname=None, comicyear=None, comicissues=None, comicpublisher=None):
         mismatch = "yes"
-        print ("gcdid:" + str(gcdid))
+        #print ("gcdid:" + str(gcdid))
         #write it to the custom_exceptions.csv and reload it so that importer will pick it up and do it's thing :)
         #custom_exceptions in this format...
         #99, (comicid), (gcdid), none
@@ -209,6 +209,20 @@ def GCDaddComic(self, comicid, comicname=None, comicyear=None, comicissues=None,
         gcomicid = "G" + str(comicid)
         comicyear_len = comicyear.find(' ', 2)
         comyear = comicyear[comicyear_len+1:comicyear_len+5]
+        if comyear.isdigit():
+            logger.fdebug("Series year set to : " + str(comyear))
+        else:
+            logger.fdebug("Invalid Series year detected - trying to adjust from " + str(comyear))
+            #comicyear_len above will trap wrong year if it's 10 October 2010 - etc ( 2000 AD)...
+            find_comicyear = comicyear.split()
+            for i in find_comicyear:
+                if len(i) == 4:
+                    logger.fdebug("Series year detected as : " + str(i))
+                    comyear = str(i)
+                    continue
+
+            logger.fdebug("Series year set to: " + str(comyear))
+
         controlValueDict = { 'ComicID': gcomicid }
         newValueDict = {'ComicName': comicname,
                         'ComicYear': comyear,
@@ -398,7 +412,7 @@ def pullist(self):
                 #raise cherrypy.HTTPRedirect("home")
         else:
             return self.manualpull()
-        return serve_template(templatename="weeklypull.html", title="Weekly Pull", weeklyresults=weeklyresults, pulldate=pulldate['SHIPDATE'],pullfilter=False)
+        return serve_template(templatename="weeklypull.html", title="Weekly Pull", weeklyresults=weeklyresults, pulldate=pulldate['SHIPDATE'])
     pullist.exposed = True   
 
     def filterpull(self):
@@ -407,7 +421,7 @@ def filterpull(self):
         pulldate = myDB.action("SELECT * from weekly").fetchone()
         if pulldate is None:
             raise cherrypy.HTTPRedirect("home")
-        return serve_template(templatename="weeklypull.html", title="Weekly Pull", weeklyresults=weeklyresults, pulldate=pulldate['SHIPDATE'], pullfilter=True)
+        return serve_template(templatename="weeklypull.html", title="Weekly Pull", weeklyresults=weeklyresults, pulldate=pulldate['SHIPDATE'])
     filterpull.exposed = True
 
     def manualpull(self):