mylar/parseit.py

#  This file is part of Mylar.
#
#  Mylar is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  Mylar is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Mylar.  If not, see <http://www.gnu.org/licenses/>.


from bs4 import BeautifulSoup 
import urllib2 
import re 
import helpers 
import logger 
import datetime 
from decimal import Decimal 
from HTMLParser import HTMLParseError

def GCDScraper(ComicName, ComicYear, Total, ComicID):
    NOWyr = datetime.date.today().year
    if datetime.date.today().month == 12:
        NOWyr = NOWyr + 1
        logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
    comicnm = ComicName
    comicyr = ComicYear
    comicis = Total
    comicid = ComicID
    #print ( "comicname: " + str(comicnm) )
    #print ( "comicyear: " + str(comicyr) )
    #print ( "comichave: " + str(comicis) )
    #print ( "comicid: " + str(comicid) )
    comicnm = re.sub(' ', '+', comicnm)
    input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31&series=' + str(comicnm) + '&is_indexed=None'
    response = urllib2.urlopen ( input )
    soup = BeautifulSoup ( response)
    cnt1 = len(soup.findAll("tr", {"class" : "listing_even"}))
    cnt2 = len(soup.findAll("tr", {"class" : "listing_odd"}))

    cnt = int(cnt1 + cnt2)

    #print (str(cnt) + " results")

    resultName = []
    resultID = []
    resultYear = []
    resultIssues = []
    resultURL = None
    n_odd = -1
    n_even = -1
    n = 0
    while ( n < cnt ):
        if n%2==0:
            n_even+=1
            resultp = soup.findAll("tr", {"class" : "listing_even"})[n_even]
        else:
            n_odd+=1
            resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
        rtp = resultp('a')[1]
        resultName.append(helpers.cleanName(rtp.findNext(text=True)))
        #print ( "Comic Name: " + str(resultName[n]) )
        fip = resultp('a',href=True)[1]
        resultID.append(fip['href'])
        #print ( "ID: " + str(resultID[n]) )

        subtxt3 = resultp('td')[3]
        resultYear.append(subtxt3.findNext(text=True))
        resultYear[n] = resultYear[n].replace(' ','')
        subtxt4 = resultp('td')[4]
        resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True)))
        resiss = resultIssues[n].find('issue')
        resiss = int(resiss)
        resultIssues[n] = resultIssues[n].replace('','')[:resiss]
        resultIssues[n] = resultIssues[n].replace(' ','')
        #print ( "Year: " + str(resultYear[n]) )
        #print ( "Issues: " + str(resultIssues[n]) )
        CleanComicName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', comicnm)
        CleanComicName = re.sub(' ', '', CleanComicName).lower()
        CleanResultName = re.sub('[\,\.\:\;\'\[\]\(\)\!\@\#\$\%\^\&\*\-\_\+\=\?\/]', '', resultName[n])        
        CleanResultName = re.sub(' ', '', CleanResultName).lower()
        #print ("CleanComicName: " + str(CleanComicName))
        #print ("CleanResultName: " + str(CleanResultName))
        if CleanResultName == CleanComicName or CleanResultName[3:] == CleanComicName:
        #if resultName[n].lower() == helpers.cleanName(str(ComicName)).lower(): 
            #print ("n:" + str(n) + "...matched by name to Mylar!")
            #this has been seen in a few instances already, so trying to adjust.
            #when the series year is 2011, in gcd it might be 2012 due to publication
            #dates overlapping between Dec/11 and Jan/12. Let's accept a match with a 
            #1 year grace space, and then pull in the first issue to see the actual pub
            # date and if coincides with the other date..match it.
            if resultYear[n] == ComicYear or resultYear[n] == str(int(ComicYear)+1): 
                #print ("n:" + str(n) + "...matched by year to Mylar!")
                #print ( "Year: " + str(resultYear[n]) )
                #Occasionally there are discrepancies in comic count between
                #GCD and CV. 99% it's CV not updating to the newest issue as fast
                #as GCD does. Therefore, let's increase the CV count by 1 to get it
                #to match, any more variation could cause incorrect matching.
                #ie. witchblade on GCD says 159 issues, CV states 161.
                if int(resultIssues[n]) == int(Total) or int(resultIssues[n]) == int(Total)+1 or (int(resultIssues[n])+1) == int(Total):
                    #print ("initial issue match..continuing.")
                    if int(resultIssues[n]) == int(Total)+1:
                        issvariation = "cv"
                    elif int(resultIssues[n])+1 == int(Total):
                        issvariation = "gcd"
                    else:
                        issvariation = "no"
                        #print ("n:" + str(n) + "...matched by issues to Mylar!")
                        #print ("complete match!...proceeding")
                    TotalIssues = resultIssues[n]
                    resultURL = str(resultID[n])
                    rptxt = resultp('td')[6]
                    resultPublished = rptxt.findNext(text=True)
                    #print ("Series Published: " + str(resultPublished))
                    break
                
        n+=1
    # it's possible that comicvine would return a comic name incorrectly, or gcd
    # has the wrong title and won't match 100%...
    # (ie. The Flash-2011 on comicvine is Flash-2011 on gcd)
    # this section is to account for variations in spelling, punctuation, etc/
    basnumbs = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12}
    if resultURL is None:
        #search for number as text, and change to numeric
        for numbs in basnumbs:
            #print ("numbs:" + str(numbs))
            if numbs in ComicName.lower():
                numconv = basnumbs[numbs]
                #print ("numconv: " + str(numconv))
                ComicNm = re.sub(str(numbs), str(numconv), ComicName.lower())
                #print ("comicname-reVISED:" + str(ComicNm))
                return GCDScraper(ComicNm, ComicYear, Total, ComicID)
                break
        if ComicName.lower().startswith('the '):
            ComicName = ComicName[4:]
            return GCDScraper(ComicName, ComicYear, Total, ComicID)        
        if ':' in ComicName: 
            ComicName = re.sub(':', '', ComicName)
            return GCDScraper(ComicName, ComicYear, Total, ComicID)
        if '-' in ComicName:
            ComicName = re.sub('-', ' ', ComicName)
            return GCDScraper(ComicName, ComicYear, Total, ComicID)
        if 'and' in ComicName.lower():
            ComicName = ComicName.replace('and', '&')
            return GCDScraper(ComicName, ComicYear, Total, ComicID)        
        return 'No Match'
    #vari_loop = 0
    return GCDdetails(comseries=None, resultURL=resultURL, vari_loop=0, ComicID=ComicID, TotalIssues=TotalIssues, issvariation=issvariation, resultPublished=resultPublished)


def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariation, resultPublished):

    gcdinfo = {}
    gcdchoice = []
    gcount = 0
    i = 0
    if vari_loop > 1:
        resultPublished = "Unknown"

    if vari_loop == 99: vari_loop = 1

    while (i <= vari_loop):
        if vari_loop > 0:
            try:
                boong = comseries['comseries'][i]
            except IndexError:
                break
            resultURL = boong['comseriesID']
            ComicID = boong['comicid']
            TotalIssues+= int(boong['comseriesIssues'])
        else: 
            resultURL = resultURL
            # if we're here - it means it's a mismatched name.
            # let's pull down the publication date as it'll be blank otherwise
            inputMIS = 'http://www.comics.org' + str(resultURL)
            resp = urllib2.urlopen ( inputMIS )
            soup = BeautifulSoup ( resp )

            parsed = soup.find("div", {"id" : "series_data"})
            subtxt3 = parsed.find("dd", {"id" : "publication_dates"})
            resultPublished = subtxt3.findNext(text=True).rstrip()
            #print ("pubdate:" + str(resultPublished))
            coverst = soup.find("div", {"id" : "series_cover"})
            if coverst < 0: 
                gcdcover = "None"
            else:
                subcoverst = coverst('img',src=True)[0]
                gcdcover = subcoverst['src']

        #print ("resultURL:" + str(resultURL))
        #print ("comicID:" + str(ComicID))
        input2 = 'http://www.comics.org' + str(resultURL) + 'details/'
        resp = urllib2.urlopen(input2)
        soup = BeautifulSoup(resp)

        #for newer comics, on-sale date has complete date...
        #for older comics, pub.date is to be used

        type = soup.find(text=' On-sale date ')
        if type:
            #print ("on-sale date detected....adjusting")
            datetype = "on-sale"
        else:
            #print ("pub date defaulting")
            datetype = "pub"

        cnt1 = len(soup.findAll("tr", {"class" : "row_even_False"}))
        cnt2 = len(soup.findAll("tr", {"class" : "row_even_True"}))

        cnt = int(cnt1 + cnt2)

        #print (str(cnt) + " Issues in Total (this may be wrong due to alternate prints, etc")

        n_odd = -1
        n_even = -1
        n = 0
        PI = "1.00"
        altcount = 0
        while ( n < cnt ):       
            if n%2==0:
                n_odd+=1
                parsed = soup.findAll("tr", {"class" : "row_even_False"})[n_odd]
                ntype = "odd"
            else:
                n_even+=1
                ntype = "even"
                parsed = soup.findAll("tr", {"class" : "row_even_True"})[n_even]
            subtxt3 = parsed.find("a")
            ParseIssue = subtxt3.findNext(text=True)

            fid = parsed('a',href=True)[0]
            resultGID = fid['href']
            resultID = resultGID[7:-1]
            #print ( "ID: " + str(resultID) )

            if ',' in ParseIssue: ParseIssue = re.sub("\,", "", ParseIssue)
            #print ("ParseIssue before : " + str(ParseIssue))
            if 'Vol' in ParseIssue or '[' in ParseIssue: 
                ParseIssue = re.sub("[^0-9]", "", ParseIssue)
            isslen = ParseIssue.find(' ')
            #if 'isslen' exists, it means that it's an alternative cover.
            #however, if ONLY alternate covers exist of an issue it won't work.
            #let's use the FIRST record, and ignore all other covers for the given issue.
            isschk = ParseIssue[:isslen]
            #print ("Parse is now: " + str(isschk))

            #check if decimal or '1/2' exists or not, and store decimal results
            halfchk = "no"
            if '.' in isschk:
                isschk_find = isschk.find('.')
                isschk_b4dec = isschk[:isschk_find]
                isschk_decval = isschk[isschk_find+1:]
            elif '/' in isschk:
                ParseIssue = "0.50"
                isslen = 0
                halfchk = "yes"
            else:
                isschk_decval = ".00"

            if isslen > 0:
                isschk = ParseIssue[:isslen]
                isschk2 = str(isschk) + isschk_decval
                #logger.fdebug("isschk: " + str(isschk) + " ...isschk2: " + str(isschk2))
                if 'a' in isschk or 'b' in isschk or 'c' in isschk:
                    isschk2 = ParseIssue[:isslen-1] + isschk_decval
                    #altcount == 2
                ParseIssue = str(isschk2)
                #logger.fdebug("Alt.cover found = " + str(isschk2))
                if str(PI) == str(isschk2):
                    #logger.fdebug("matched on PI: " + str(PI) + " .. and isschk2: " + str(isschk2))
                    if altcount == 0:
                        #logger.fdebug("first occurance - marking and continuing..." + str(isschk2))
                        #this handles the first occurance..
                        ParseIssue = str(isschk2)
                        PI = str(isschk2)
                        altcount = 1
                    else:
                        #logger.fdebug("Using only first record for issue - ignoring further alternate matches")
                        ParseIssue = "this is wrong"
                        altcount+=1
                else:
                    #logger.fdebug("issues didn't match.")
                    altcount = 1
                    ParseIssue = str(isschk) + isschk_decval
            else:
                if halfchk == "yes": pass
                else: 
                    ParseIssue = ParseIssue + isschk_decval
                #print ("no alt.cover detected for - " + str(ParseIssue))
                altcount = 1
            if (altcount == 1):
                #logger.fdebug("adding issue to db : " + str(ParseIssue))
                # in order to get the compare right, let's decimialize the string to '.00'.
                gcdinfo['ComicIssue'] = ParseIssue
                #print "Issue: " + str(ParseIssue)
                #^^ will retrieve issue
                #if datetype == "on-sale":
                subtxt1 = parsed('td')[2]
                ParseDate = subtxt1.findNext(text=True)
                pdlen = len(ParseDate)
                #print "sale-date..ParseDate:" + str(ParseDate)
                #print ("Parsed Date length: " + str(pdlen))
                if len(ParseDate) < 7:
                    subtxt3 = parsed('td')[0]
                    ParseDate = subtxt3.findNext(text=True)               
                    #print "pub-date..ParseDate:" + str(ParseDate)
                    if ParseDate == ' ':
                        #default to empty so doesn't error out.
                        ParseDate = "0000-00-00"
                #ParseDate = ParseDate.replace('?','')
                ParseDate = ParseDate.replace(' ','')
                #print "Parse date: " + str(ParseDate)
                gcdinfo['ComicDate'] = ParseDate
                #^^ will retrieve date #
                if not any(d.get('GCDIssue', None) == str(gcdinfo['ComicIssue']) for d in gcdchoice):
                    #logger.fdebug("adding: " + str(gcdinfo['ComicIssue']))
                    if ComicID[:1] == "G":
                        gcdchoice.append({
                            'GCDid':                ComicID,
                            'IssueID':              resultID,
                            'GCDIssue':             gcdinfo['ComicIssue'],
                            'GCDDate':              gcdinfo['ComicDate']
                            })
                        gcount+=1
                    else:
                        gcdchoice.append({
                            'GCDid':                ComicID,
                            'GCDIssue':             gcdinfo['ComicIssue'],
                            'GCDDate':              gcdinfo['ComicDate']
                            })

                    gcdinfo['gcdchoice'] = gcdchoice

                else:
                    #--if 2 identical issue numbers legitimately exist, but have different
                    #--publication dates, try to distinguish
                    logger.fdebug("2 identical issue #'s have been found...determining if it's intentional.")
                    #get current issue & publication date.
                    logger.fdebug("Issue #:" + str(gcdinfo['ComicIssue']))
                    logger.fdebug("IssueDate: " + str(gcdinfo['ComicDate']))
                    #get conflicting issue from tuple
                    for d in gcdchoice:
                        if str(d['GCDIssue']) == str(gcdinfo['ComicIssue']):
                            logger.fdebug("Issue # already in tuple - checking IssueDate:" + str(d['GCDDate']) )
                            if str(d['GCDDate']) == str(gcdinfo['ComicDate']):
                                logger.fdebug("Issue #'s and dates match...skipping.")
                            else:
                                logger.fdebug("Issue#'s match but different publication dates, not skipping.")
                    #pass
                    #logger.fdebug("Duplicate issue detected in DB - ignoring subsequent issue # " + str(gcdinfo['ComicIssue']))

                PI = ParseIssue
        #else:
            # -- this needs a rework --
            # if issue only has alternative covers on comics.org, it won't match
            # and will cause the script to return a cannot retrieve..
            #compare previous issue to current issue (to help with alt.cover count)
         #   PI = ParseIssue
         #   altcount+=1
         #   print ("alternate issue - ignoring")
        #altcount = 0
            n+=1
        i+=1
    gcdinfo['gcdvariation'] = issvariation
    if ComicID[:1] == "G":
        gcdinfo['totalissues'] = gcount
    else:
        gcdinfo['totalissues'] = TotalIssues
    gcdinfo['ComicImage'] = gcdcover
    gcdinfo['resultPublished'] = resultPublished
    #print ("gcdvariation: " + str(gcdinfo['gcdvariation']))
    return gcdinfo
        ## -- end (GCD) -- ##

def GCDAdd(gcdcomicid):
    serieschoice = []
    series = {}
    logger.fdebug("I'm trying to find these GCD comicid's:" + str(gcdcomicid))
    for gcdid in gcdcomicid:
        logger.fdebug("looking at gcdid:" + str(gcdid))
        input2 = 'http://www.comics.org/series/' + str(gcdid)
        logger.fdebug("---url: " + str(input2))
        resp = urllib2.urlopen ( input2 )
        soup = BeautifulSoup ( resp )
        logger.fdebug("SeriesName section...")
        parsen = soup.find("span", {"id" : "series_name"})
        #logger.fdebug("series name (UNPARSED): " + str(parsen))
        subpar = parsen('a')[0]
        resultName = subpar.findNext(text=True)
        logger.fdebug("ComicName: " + str(resultName))
        #covers-start
        logger.fdebug("Covers section...")
        coverst = soup.find("div", {"id" : "series_cover"})
        if coverst < 0:
            gcdcover = "None"
            logger.fdebug("unable to find any covers - setting to None")
        else:
            subcoverst = coverst('img',src=True)[0]
            #logger.fdebug("cover (UNPARSED) : " + str(subcoverst))
            gcdcover = subcoverst['src']
        logger.fdebug("Cover: " + str(gcdcover))
        #covers end
        #publisher start
        logger.fdebug("Publisher section...")
        try:
            pubst = soup.find("div", {"class" : "item_data"})
            catchit = pubst('a')[0]

        except (IndexError, TypeError):
            pubst = soup.findAll("div", {"class" : "left"})[1]
            catchit = pubst.find("a")

        publisher = catchit.findNext(text=True)
        logger.fdebug("Publisher: " + str(publisher))
        #publisher end
        parsed = soup.find("div", {"id" : "series_data"})
        #logger.fdebug("series_data: " + str(parsed))
        #print ("parse:" + str(parsed))
        subtxt3 = parsed.find("dd", {"id" : "publication_dates"})
        #logger.fdebug("publication_dates: " + str(subtxt3))
        pubdate = subtxt3.findNext(text=True).rstrip()
        logger.fdebug("pubdate:" + str(pubdate))
        subtxt4 = parsed.find("dd", {"id" : "issues_published"})
        noiss = subtxt4.findNext(text=True)
        lenwho = len(noiss)
        lent = noiss.find(' ',2)
        lenf = noiss.find('(')
        stringit = noiss[lenf:lenwho]
        stringout = noiss[:lent]
        noissues = stringout.rstrip('  \t\r\n\0')
        numbering = stringit.rstrip('  \t\r\n\0')
        logger.fdebug("noissues:" + str(noissues))
        logger.fdebug("numbering:" + str(numbering))
        serieschoice.append({
               "ComicID":         gcdid,
               "ComicName":       resultName,
               "ComicYear" :        pubdate,
               "ComicIssues" :    noissues,
               "ComicPublisher" : publisher,
               "ComicCover" :     gcdcover
              })   
    series['serieschoice'] = serieschoice 
    return series


def ComChk(ComicName, ComicYear, ComicPublisher, Total, ComicID):
    comchkchoice = []
    comchoice = {}

    NOWyr = datetime.date.today().year
    if datetime.date.today().month == 12:
        NOWyr = NOWyr + 1
        logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
    comicnm = ComicName
    comicyr = ComicYear
    comicis = Total
    comicid = ComicID
    comicpub = ComicPublisher
    #print ( "comicname: " + str(comicnm) )
    #print ( "comicyear: " + str(comicyr) )
    #print ( "comichave: " + str(comicis) )
    #print ( "comicpub: " + str(comicpub) )
    #print ( "comicid: " + str(comicid) )
    # do 3 runs at the comics.org search to get the best results
    comicrun = []
    # &pub_name=DC
    # have to remove the spaces from Publisher or else will not work (ie. DC Comics vs DC will not match)
    # take the 1st word ;)
    #comicpub = comicpub.split()[0]
    # if it's not one of the BIG publisher's it might fail - so let's increase the odds.
    pubbiggies = [ 'DC', 
                   'Marvel',
                   'Image',
                   'IDW' ]
    uhuh = "no"
    for pb in pubbiggies:
        if pb in comicpub:
            #keep publisher in url if a biggie.    
            uhuh = "yes"
            #print (" publisher match : " + str(comicpub))
            conv_pub = comicpub.split()[0]
            #print (" converted publisher to : " + str(conv_pub))
    #1st run setup - leave it all as it is.
    comicrun.append(comicnm)
    cruncnt = 0
    #2nd run setup - remove the last character and do a broad search (keep year or else will blow up)
    if len(str(comicnm).split()) > 2:
        comicrun.append(' '.join(comicnm.split(' ')[:-1]))
        cruncnt+=1
    # to increase the likely hood of matches and to get a broader scope...
    # lets remove extra characters
    if re.sub('[\.\,\:]', '', comicnm) != comicnm:
        comicrun.append(re.sub('[\.\,\:]', '', comicnm))
        cruncnt+=1
    totalcount = 0
    cr = 0
    #print ("cruncnt is " + str(cruncnt))
    while (cr <= cruncnt):
        #print ("cr is " + str(cr))
        comicnm = comicrun[cr]
        #leaving spaces in will screw up the search...let's take care of it
        comicnm = re.sub(' ', '+', comicnm)
        #print ("comicnm: " + str(comicnm))
        if uhuh == "yes":
            publink = "&pub_name=" + str(conv_pub)
        if uhuh == "no":
            publink = "&pub_name="
        input = 'http://www.comics.org/search/advanced/process/?target=series&method=icontains&logic=False&keywords=&order1=series&order2=date&order3=&start_date=' + str(comicyr) + '-01-01&end_date=' + str(NOWyr) + '-12-31' + '&title=&feature=&job_number=&pages=&script=&pencils=&inks=&colors=&letters=&story_editing=&genre=&characters=&synopsis=&reprint_notes=&story_reprinted=None&notes=' + str(publink) + '&pub_notes=&brand=&brand_notes=&indicia_publisher=&is_surrogate=None&ind_pub_notes=&series=' + str(comicnm) + '&series_year_began=&series_notes=&tracking_notes=&issue_count=&is_comics=None&format=&color=&dimensions=&paper_stock=&binding=&publishing_format=&issues=&volume=&issue_title=&variant_name=&issue_date=&indicia_frequency=&price=&issue_pages=&issue_editing=&isbn=&barcode=&issue_notes=&issue_reprinted=None&is_indexed=None'
        response = urllib2.urlopen ( input )
        soup = BeautifulSoup ( response)
        cnt1 = len(soup.findAll("tr", {"class" : "listing_even"}))
        cnt2 = len(soup.findAll("tr", {"class" : "listing_odd"}))

        cnt = int(cnt1 + cnt2)
#        print ("cnt1: " + str(cnt1))
#        print ("cnt2: " + str(cnt2))
#        print (str(cnt) + " results")

        resultName = []
        resultID = []
        resultYear = []
        resultIssues = []
        resultPublisher = []
        resultURL = None
        n_odd = -1
        n_even = -1
        n = 0
        while ( n < cnt ):
            if n%2==0:
                n_even+=1
                resultp = soup.findAll("tr", {"class" : "listing_even"})[n_even]
            else:
                n_odd+=1
                resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
            rtp = resultp('a')[1]
            resultName.append(helpers.cleanName(rtp.findNext(text=True)))
#            print ( "Comic Name: " + str(resultName[n]) )

            pub = resultp('a')[0]
            resultPublisher.append(pub.findNext(text=True))
#            print ( "Publisher: " + str(resultPublisher[n]) )

            fip = resultp('a',href=True)[1]
            resultID.append(fip['href'])
#            print ( "ID: " + str(resultID[n]) )

            subtxt3 = resultp('td')[3]
            resultYear.append(subtxt3.findNext(text=True))
            resultYear[n] = resultYear[n].replace(' ','')
            subtxt4 = resultp('td')[4]
            resultIssues.append(helpers.cleanName(subtxt4.findNext(text=True)))
            resiss = resultIssues[n].find('issue')
            resiss = int(resiss)
            resultIssues[n] = resultIssues[n].replace('','')[:resiss]
            resultIssues[n] = resultIssues[n].replace(' ','')
#            print ( "Year: " + str(resultYear[n]) )
#            print ( "Issues: " + str(resultIssues[n]) )
#            print ("comchkchoice: " + str(comchkchoice))
            if not any(d.get('GCDID', None) == str(resultID[n]) for d in comchkchoice):
                #print ( str(resultID[n]) + " not in DB...adding.")
                comchkchoice.append({
                       "ComicID":         str(comicid),
                       "ComicName":       str(resultName[n]),
                       "GCDID":           str(resultID[n]).split('/')[2],
                       "ComicYear" :      str(resultYear[n]),
                       "ComicPublisher" : str(resultPublisher[n]),
                       "ComicURL" :       "http://www.comics.org" + str(resultID[n]),
                       "ComicIssues" :    str(resultIssues[n])
                      })
            #else:
                #print ( str(resultID[n]) + " already in DB...skipping" ) 
            n+=1
        cr+=1
    totalcount= totalcount + cnt
    comchoice['comchkchoice'] = comchkchoice
    return comchoice, totalcount