Initial commit.

jbeluch · Nov 24, 2011 · 6e25342 · 6e25342
commit 6e25342
Show file tree

Hide file tree

Showing 8 changed files with 323 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+*.pyo
+*.swp
diff --git a/addon.py b/addon.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+import re
+from itertools import chain
+from urlparse import urljoin
+from xbmcswift import Plugin, download_page, xbmc, xbmcgui
+from BeautifulSoup import BeautifulSoup as BS, SoupStrainer as SS
+from resources.lib.videohosts import resolve
+from resources.lib.googleforms import report_broken_url
+
+__plugin_name__ = 'Documentary Heaven'
+__plugin_id__ = 'plugin.video.documentaryheaven'
+plugin = Plugin(__plugin_name__, __plugin_id__, __file__)
+BASE_URL = 'http://documentaryheaven.com'
+ALL_DOCS_URL = 'http://documentaryheaven.com/documentary-list/'
+
+
+def full_url(path):
+    return urljoin(BASE_URL, path)
+
+
+def htmlify(url):
+    return BS(download_page(url))
+
+
+@plugin.route('/')
+def show_homepage():
+    items = [
+        {'label': 'All Documentaries', 'url': plugin.url_for('show_all')},
+        {'label': 'By Category', 'url': plugin.url_for('show_categories')},
+    ]
+    return plugin.add_items(items)
+
+
+@plugin.route('/all/')
+def show_all():
+    html = htmlify(ALL_DOCS_URL)
+    uls = html.findAll('ul', {'class': 'lcp_catlist'})
+    liss = [ul.findAll('li') for ul in uls]
+
+    # Need to extract into tuples first in order to call set(). Cannot call set
+    # on dicts since they are mutable.
+    label_urls = set((li.a.string, plugin.url_for('play', url=li.a['href']))
+                     for li in chain(*liss))
+    items = [{'label': label, 
+        'url': url, 
+        'is_playable': True, 
+        'is_folder': False
+        } for label, url in label_urls]
+    return plugin.add_items(sorted(items, key= lambda item: item['label']))
+
+
+@plugin.route('/categories/')
+def show_categories():
+    html = htmlify(BASE_URL)
+    categories = html.findAll('li',
+                              {'class': lambda cls: cls and 'cat-item' in cls})
+    items = [
+        {'label': c.a.string,
+         'url': plugin.url_for('show_category_videos', category=c.a.string),
+        } for c in categories]
+    return plugin.add_items(items)
+
+@plugin.route('/categories/<category>/')
+def show_category_videos(category):
+    html = htmlify(ALL_DOCS_URL)
+    categories = html.findAll('div', id='catListItem')
+    _category = (div for div in categories if div.h2.string == category).next()
+    videos = _category.findAll('li')
+    items = [{'label': video.a.string,
+              'url': plugin.url_for('play', url=video.a['href']),
+              'is_playable': True,
+              'is_folder': False,
+             } for video in videos]
+    return plugin.add_items(sorted(items, key=lambda item: item['label']))
+
+
+@plugin.route('/play/<url>/')
+def play(url):
+    plugin_url = resolve(download_page(url))
+    if plugin_url:
+        return plugin.set_resolved_url(plugin_url)
+
+    # Uh oh, things aren't working. Print the broken url to the log and ask if
+    # we can submit the url to a google form.
+    current_plugin_url = '?'.join([plugin._argv0, plugin._argv2])
+    xbmc.log('REPORT THIS URL: %s' % current_plugin_url)
+
+    dialog = xbmcgui.Dialog()
+    user_resp = dialog.yesno('Documentary Heaven Playback Problem.',
+                             'There was an issue playing this video.',
+                             ('Would you like to report the URL to the'
+                              ' developer?'))
+    if user_resp:
+       report_broken_url(current_plugin_url) 
+
+
+if __name__ == '__main__': 
+    plugin.run()
diff --git a/addon.xml b/addon.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<addon id="plugin.video.documentaryheaven" name="Documentary Heaven" version="0.1" provider-name="Jonathan Beluch (jbel)">
+  <requires>
+    <import addon="xbmc.python" version="1.0"/>
+    <import addon="script.module.beautifulsoup" version="3.0.8"/>
+    <import addon="script.module.xbmcswift" version="0.1"/>
+    <import addon="plugin.video.youtube" version="2.1.2"/>
+    <import addon="plugin.video.vimeo" version="1.1.0"/>
+  </requires>
+  <extension point="xbmc.python.pluginsource" library="addon.py">
+    <provides>video</provides>
+  </extension>
+  <extension point="xbmc.addon.metadata">
+    <platform>all</platform>
+    <summary>Summary for Documentary Heaven</summary>
+    <description>Description for Documentary Heaven</description>
+  </extension>
+</addon>
diff --git a/resources/__init__.py b/resources/__init__.py
diff --git a/resources/language/English/strings.xml b/resources/language/English/strings.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8" standalone="yes"?>
+<strings>
+  <!-- Plugin name -->
+  <string id="30000">Documentary Heaven</string>
+</strings>
diff --git a/resources/lib/__init__.py b/resources/lib/__init__.py
diff --git a/resources/lib/googleforms.py b/resources/lib/googleforms.py
@@ -0,0 +1,17 @@
+from urllib import urlopen, urlencode
+
+
+def _post(url, data):
+    '''Makes a POST request to the given url with the data payload.'''
+    conn = urlopen(url, data)
+    resp = conn.read()
+    conn.close()
+    return resp
+
+
+def report_broken_url(url):
+    '''Submits a google form via a POST request.'''
+    form_url = 'https://docs.google.com/spreadsheet/formResponse?formkey=dDJWY3hOZEtWVlYwNWJZUUVGUjd3cnc6MQ&ifq'
+    params = {'entry.0.single': url, 'pageNumber': 0, 'backupCache': '',
+              'submit': 'Submit'}
+    return _post(form_url, urlencode(params))
diff --git a/resources/lib/videohosts.py b/resources/lib/videohosts.py
@@ -0,0 +1,182 @@
+'''This module is used to extract media urls from a page's source. The
+BaseVideoHost classes are not meant to be instantiated directly. Only the
+resolve function is meant to be called directly.
+
+'''
+import re
+import urllib
+from cgi import parse_qs
+from inspect import isclass
+
+
+def _download(url):
+    '''Returns the response from the GET request for a given url.'''
+    conn = urllib.urlopen(url)
+    resp = conn.read()
+    conn.close()
+    return resp
+
+
+# _unhex modeled after python's urllib.unquote
+_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
+_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+
+
+def _unhex(inp):
+    '''Returns a new string, unescaping any instances of hex encoded
+    characters.
+
+    >>> _unhex(r'abc\x20def')
+    'abc def'
+
+    '''
+    res = inp.split(r'\x')
+    for i in xrange(1, len(res)):
+        item = res[i]
+        try:
+            res[i] = _hextochr[item[:2]] + item[2:]
+        except KeyError:
+            res[i] = '%' + item
+        except UnicodeDecodeError:
+            res[i] = unichr(int(item[:2], 16)) + item[2:]
+    return ''.join(res)
+
+
+class BaseVideoHost(object):
+    '''Abstract base class for video host resolvers. Subclasses must override
+    the match and resolve methods and should be callable as @classmethods.
+
+    '''
+
+    @classmethod
+    def match(cls, src):
+        '''Return True or False if cls is able to resolve a media url for the
+        given src.
+
+        '''
+        raise NotImplementedError
+
+    @classmethod
+    def resolve(cls, src):
+        '''Return a media url or None for the given src.'''
+        raise NotImplementedError
+
+
+class YouTube(BaseVideoHost):
+    '''Media resolver for http://www.youtube.com'''
+    _patterns = [
+        # (x, y)
+        #     x: text pattern to check for existence of a youtube video
+        #     y: regular expression that captures the youtube video id in
+        #        match.group(1)
+        ('http://www.youtube.com/embed/',
+         re.compile(r'http://www.youtube.com/embed/([^\?"]+)')),
+        ('http://www.youtube.com/p/',
+         re.compile(r'http://www.youtube.com/p/([^&\?"]+)')),
+        ('http://www.youtube.com/v/',
+         re.compile(r'http://www.youtube.com/v/([^&\?"]+)')),
+    ]
+
+    @classmethod
+    def match(cls, src):
+        '''Returns True if a youtube video is found embedded in the provided
+        src.
+
+        '''
+        for ptn, _ in cls._patterns:
+            if ptn in src:
+                return True
+        return False
+
+    @classmethod
+    def resolve(cls, src):
+        '''Retuns a playable XBMC media url pointing to the YouTube plugin or
+        None.
+
+        '''
+        url_ptn = 'plugin://plugin.video.youtube/?action=play_video&videoid=%s'
+        for _, ptn in cls._patterns:
+            match = ptn.search(src)
+            if match:
+                return url_ptn % match.group(1)
+        return None
+
+
+class GoogleVideo(BaseVideoHost):
+    '''Media resolver for http://video.google.com'''
+
+    @classmethod
+    def match(cls, src):
+        '''Returns True if a google video url is found in the page.'''
+        return 'http://video.google.com' in src
+
+    @classmethod
+    def resolve(cls, src):
+        '''Returns a media url for a google video found in the provided src.
+        Returns None if the media url cannot be resolved.
+
+        '''
+        match = re.search(
+                r'http://video.google.com/googleplayer.swf\?docid=(.+?)&', src)
+        if match:
+            return cls._get_media_url(
+                   'http://video.google.com/videoplay?docid=%s' %
+                   match.group(1))
+        return None
+
+    @classmethod
+    def _get_media_url(cls, url):
+        '''Returns the the media url for a given google video URL or None.'''
+        flvurl_match = re.search(r'preview_url:\'(.+?)\'', _download(url))
+        if not flvurl_match:
+            return None
+
+        flvurl = _unhex(flvurl_match.group(1))
+        params = parse_qs(flvurl.split('?', 1)[1])
+        return urllib.unquote_plus(params['videoUrl'][0])
+
+
+class Vimeo(BaseVideoHost):
+    '''Resolver for http://vimeo.com'''
+
+    @classmethod
+    def match(cls, src):
+        '''Searches for the vimeo swf URL or finds an embedded iframe url.'''
+        return ('http://vimeo.com/moogaloop.swf' in src or
+                'http://player.vimeo.com/video/' in src)
+
+    @classmethod
+    def resolve(cls, src):
+        '''Extracts a vimeo video id from the source and returns a playable
+        XBMC URL to the Vimeo pluign.
+
+        '''
+        match = re.search(r'http://vimeo.com/moogaloop.swf\?clip_id=(.+?)&',
+                          src)
+        if not match:
+            match = re.search('http://player.vimeo.com/video/(.+?)"', src)
+        if match:
+            return ('plugin://plugin.video.vimeo/?action=play_video&videoid=%s'
+                    % match.group(1))
+        return None
+
+
+# Populate the list of available video hosts to match against. Get any class
+# that is a subclass of BaseVideoHost but do not include BaseVideoHost itself!
+AVAILABLE_HOSTS = [attr_value for attr_name, attr_value in locals().items()
+                   if isclass(attr_value) and attr_name != 'BaseVideoHost' and
+                   issubclass(attr_value, BaseVideoHost)]
+
+
+def resolve(src):
+    '''Attempts to return a media url for the given page's source.
+
+    First loops through all available hosts stopping at the first host that
+    returns True when HOST.match(src) is called. Then host.resolve(src) is
+    called to compute the actual media url.
+
+    '''
+    for host in AVAILABLE_HOSTS:
+        if host.match(src):
+            return host.resolve(src)
+    return None