script.xbmc.subtitles/resources/lib/services/Subdivx/service.py

# -*- coding: utf-8 -*-
# Subdivx.com subtitles, based on a mod of Undertext subtitles
# Adaptation: enric_godes@hotmail.com | Please use email address for your comments

import os, sys, re, xbmc, xbmcgui, string, time, urllib, urllib2
from utilities import log
_ = sys.modules[ "__main__" ].__language__


main_url = "http://www.subdivx.com/"
debug_pretext = "subdivx"

#====================================================================================================================
# Regular expression patterns
#====================================================================================================================


#Subtitle pattern example:
#<div id="menu_titulo_buscador"><a class="titulo_menu_izq" href="http://www.subdivx.com/X6XMjEzMzIyX-iron-man-2-2010.html">Iron Man 2 (2010)</a></div>
#<img src="img/calif5.gif" class="detalle_calif">
#</div><div id="buscador_detalle">
#<div id="buscador_detalle_sub">Para la versión Iron.Man.2.2010.480p.BRRip.XviD.AC3-EVO, sacados de acá. ¡Disfruten!</div><div id="buscador_detalle_sub_datos"><b>Downloads:</b> 4673 <b>Cds:</b> 1 <b>Comentarios:</b> <a rel="nofollow" href="popcoment.php?idsub=MjEzMzIy" onclick="return hs.htmlExpand(this, { objectType: 'iframe' } )">14</a> <b>Formato:</b> SubRip <b>Subido por:</b> <a class="link1" href="http://www.subdivx.com/X9X303157">TrueSword</a> <img src="http://www.subdivx.com/pais/2.gif" width="16" height="12"> <b>el</b> 06/09/2010  </a></div></div>
#<div id="menu_detalle_buscador">

subtitle_pattern =  "<a\sclass=\"titulo_menu_izq\"\shref=\"http://www.subdivx.com/(.+?).html\">.+?<div\sid=\"buscador_detalle_sub\">(.*?)</div>.+?<b>Downloads:</b>(.+?)<b>Cds:</b>.+?</div></div>"
# group(1) = id to fetch the subs files, group(2) = user comments, may content filename, group(3)= downloads used for ratings


#====================================================================================================================
# Functions
#====================================================================================================================


def getallsubs(searchstring, languageshort, languagelong, file_original_path, subtitles_list):
    page = 1
    if languageshort == "es":
        url = main_url + "index.php?accion=5&masdesc=&oxdown=1&pg=" + str(page) + "&buscar=" + urllib.quote_plus(searchstring)

    content = geturl(url)
    log( __name__ ,u"%s Getting '%s' subs ..." % (debug_pretext, languageshort))
    while re.search(subtitle_pattern, content, re.IGNORECASE | re.DOTALL | re.MULTILINE | re.UNICODE):
        for matches in re.finditer(subtitle_pattern, content, re.IGNORECASE | re.DOTALL | re.MULTILINE | re.UNICODE):
            id = matches.group(1)
            downloads = int(re.sub(r',','',matches.group(3))) / 1000
            if (downloads > 10):
                downloads=10
            filename = string.strip(matches.group(2))
            #Remove new lines on the commentaries
            filename = re.sub('\n',' ',filename)
            #Remove Google Ads on the commentaries
            filename = re.sub(r'<script.+?script>','', filename, re.IGNORECASE | re.DOTALL | re.MULTILINE | re.UNICODE)
            #Remove HTML tags on the commentaries
            filename = re.sub(r'<[^<]+?>','', filename)
            #Find filename on the comentaries to show sync label
            filesearch = os.path.split(file_original_path)
            sync = False
            if re.search(filesearch[1][:len(filesearch[1])-4], filename):
                sync = True
            try:    
                log( __name__ ,u"%s Subtitles found: %s (id = %s)" % (debug_pretext, filename, id))
            except:
                pass
            #Find filename on the commentaries and put it in front
            title_first_word = re.split('[\W]+', searchstring)
            comments_list = re.split('\s', filename)
            n = 0
            version = None
            while n<len(comments_list) and version == None:
                version = re.search(title_first_word[0],comments_list[n], re.IGNORECASE | re.DOTALL | re.MULTILINE | re.UNICODE)
                n=n+1
            if version:
                filename = comments_list[n-1] + " | " + filename
            #End search filename
            subtitles_list.append({'rating': str(downloads), 'filename': filename, 'sync': sync, 'id' : id, 'language_flag': 'flags/' + languageshort + '.gif', 'language_name': languagelong})
        page = page + 1
        url = main_url + "index.php?accion=5&masdesc=&oxdown=1&pg=" + str(page) + "&buscar=" + urllib.quote_plus(searchstring)
        content = geturl(url)

    # Bubble sort, to put syncs on top
    for n in range(0,len(subtitles_list)):
        for i in range(1, len(subtitles_list)):
            temp = subtitles_list[i]
            if subtitles_list[i]["sync"] > subtitles_list[i-1]["sync"]:
                subtitles_list[i] = subtitles_list[i-1]
                subtitles_list[i-1] = temp


def geturl(url):
    class MyOpener(urllib.FancyURLopener):
        version = ''
    my_urlopener = MyOpener()
    log( __name__ ,u"%s Getting url: %s" % (debug_pretext, url))
    try:
        response = my_urlopener.open(url)
        content    = response.read()
    except:
        log( __name__ ,u"%s Failed to get url:%s" % (debug_pretext, url))
        content    = None
    return content


def search_subtitles( file_original_path, title, tvshow, year, season, episode, set_temp, rar, lang1, lang2, lang3, stack ): #standard input
    subtitles_list = []
    msg = ""
    if len(tvshow) == 0:
        searchstring = title
    if len(tvshow) > 0:
        searchstring = "%s S%#02dE%#02d" % (tvshow, int(season), int(episode))
    log( __name__ ,u"%s Search string = %s" % (debug_pretext, searchstring))

    spanish = 0
    if string.lower(lang1) == "spanish": spanish = 1
    elif string.lower(lang2) == "spanish": spanish = 2
    elif string.lower(lang3) == "spanish": spanish = 3

    getallsubs(searchstring, "es", "Spanish", file_original_path, subtitles_list)

    if spanish == 0:
        msg = "Won't work, Subdivx is only for Spanish subtitles!"

    return subtitles_list, "", msg #standard output


def download_subtitles (subtitles_list, pos, zip_subs, tmp_sub_dir, sub_folder, session_id): #standard input
    id = subtitles_list[pos][ "id" ]
    url = main_url + str(id) #get the page with the subtitle link, ie http://www.subdivx.com/X6XMjE2NDM1X-iron-man-2-2010
    content = geturl(url)
    match=re.compile('bajar.php\?id=(.*?)&u=(.*?)\"',re.IGNORECASE | re.DOTALL | re.MULTILINE | re.UNICODE).findall(content)

    language = subtitles_list[pos][ "language_name" ]
    url = main_url + "bajar.php?id=" + match[0][0] + "&u=" + match[0][1]
    content = geturl(url)
    if content is not None:
        header = content[:4]
        if header == 'Rar!':
            local_tmp_file = os.path.join(tmp_sub_dir, "subdivx.rar")
            packed = True
        elif header == 'PK':
            local_tmp_file = os.path.join(tmp_sub_dir, "subdivx.zip")
            packed = True
        else: # never found/downloaded an unpacked subtitles file, but just to be sure ...
            local_tmp_file = os.path.join(tmp_sub_dir, "subdivx.srt") # assume unpacked sub file is an '.srt'
            subs_file = local_tmp_file
            packed = False
        log( __name__ ,u"%s Saving subtitles to '%s'" % (debug_pretext, local_tmp_file))
        try:
            local_file_handle = open(local_tmp_file, "wb")
            local_file_handle.write(content)
            local_file_handle.close()
        except:
            log( __name__ ,u"%s Failed to save subtitles to '%s'" % (debug_pretext, local_tmp_file))
        if packed:
            files = os.listdir(tmp_sub_dir)
            init_filecount = len(files)
            log( __name__ ,u"%s subdivx: número de init_filecount %s" % (debug_pretext, init_filecount)) #EGO
            filecount = init_filecount
            max_mtime = 0
            # determine the newest file from tmp_sub_dir
            for file in files:
                if (string.split(file,'.')[-1] in ['srt','sub','txt']):
                    mtime = os.stat(os.path.join(tmp_sub_dir, file)).st_mtime
                    if mtime > max_mtime:
                        max_mtime =  mtime
            init_max_mtime = max_mtime
            time.sleep(2)  # wait 2 seconds so that the unpacked files are at least 1 second newer
            xbmc.executebuiltin("XBMC.Extract(" + local_tmp_file.encode("utf-8") + "," + tmp_sub_dir.encode("utf-8") +")")
            waittime  = 0
            while (filecount == init_filecount) and (waittime < 20) and (init_max_mtime == max_mtime): # nothing yet extracted
                time.sleep(1)  # wait 1 second to let the builtin function 'XBMC.extract' unpack
                files = os.listdir(tmp_sub_dir)
                filecount = len(files)
                # determine if there is a newer file created in tmp_sub_dir (marks that the extraction had completed)
                for file in files:
                    if (string.split(file,'.')[-1] in ['srt','sub','txt']):
                        mtime = os.stat(os.path.join(tmp_sub_dir, file.decode("utf-8"))).st_mtime
                        if (mtime > max_mtime):
                            max_mtime =  mtime
                waittime  = waittime + 1
            if waittime == 20:
                log( __name__ ,u"%s Failed to unpack subtitles in '%s'" % (debug_pretext, tmp_sub_dir))
            else:
                log( __name__ ,u"%s Unpacked files in '%s'" % (debug_pretext, tmp_sub_dir))
                for file in files:
                    # there could be more subtitle files in tmp_sub_dir, so make sure we get the newly created subtitle file
                    if (string.split(file, '.')[-1] in ['srt', 'sub', 'txt']) and (os.stat(os.path.join(tmp_sub_dir, file)).st_mtime > init_max_mtime): # unpacked file is a newly created subtitle file
                        log( __name__ ,u"%s Unpacked subtitles file '%s'" % (debug_pretext, file))
                        subs_file = os.path.join(tmp_sub_dir, file.decode("utf-8"))
        return False, language, subs_file #standard output