Chapter 10 distributed web framework examples

jcleblanc · May 26, 2011 · 0752f2b · 0752f2b
1 parent caf7ccc
commit 0752f2b
Show file tree

Hide file tree

Showing 15 changed files with 540 additions and 0 deletions.
diff --git a/chapter_10/opengraph-php-parser/OpenGraph.php b/chapter_10/opengraph-php-parser/OpenGraph.php
@@ -0,0 +1,75 @@
+<?php
+/*******************************************************************************
+ * Class Name: Open Graph Parser
+ * Description: Parses an HTML document to retrieve and store Open Graph
+ *              tags from the meta data
+ * Author: Jonathan LeBlanc (Twitter: @jcleblanc)
+ * License: Creative Commons (http://creativecommons.org/licenses/by-sa/2.0/)
+ * Useage: 
+ *   $url = 'http://www.example.com/index.html';
+ *   $graph = new OpenGraph($url);
+ *   print_r($graph->get_one('title'));  //get only title element
+ *   print_r($graph->get_all());         //return all Open Graph tags
+ ******************************************************************************/
+class OpenGraph{
+    //the open graph associative array
+    private static $og_content = array();
+
+    /***************************************************************************
+     * Function: Class Constructor
+     * Description: Initiates the request to fetch OG data
+     * Params: $url (string) - URL of page to collect OG tags from
+     **************************************************************************/
+    public function __construct($url){
+        if ($url){
+            self::$og_content = self::get_graph($url);    
+        }
+    }
+
+    /***************************************************************************
+     * Function: Get Open Graph
+     * Description: Initiates the request to fetch OG data
+     * Params: $url (string) - URL of page to collect OG tags from
+     * Return: Object - associative array containing the OG data in format
+     *                  property : content
+     **************************************************************************/
+    private function get_graph($url){
+        //fetch html content from web source and filter to meta data
+        $dom = new DOMDocument();
+        @$dom->loadHtmlFile($url);
+        $tags = $dom->getElementsByTagName('meta');
+
+        //set open graph search tag and return object
+        $og_pattern = '/^og:/';
+        $graph_content = array();
+
+        //for each open graph tag, store in return object as property : content 
+        foreach ($tags as $element){
+            if (preg_match($og_pattern, $element->getAttribute('property'))){
+                $graph_content[preg_replace($og_pattern, '', $element->getAttribute('property'))] = $element->getAttribute('content');
+            }
+        }
+
+        //store all open graph tags
+        return $graph_content;
+    }
+
+    /***************************************************************************
+     * Function: Get One Tag
+     * Description: Fetches the content of one OG tag
+     * Return: String - the content of one requested OG tag
+     **************************************************************************/
+    public function get_one($element){
+        return self::$og_content[$element];
+    }
+
+    /***************************************************************************
+     * Function: Get All Tags
+     * Description: Fetches the content of one OG tag
+     * Return: Object - The entire OG associative array
+     **************************************************************************/
+    public function get_all(){
+        return self::$og_content;
+    }
+}
+?>
diff --git a/chapter_10/opengraph-php-parser/og_test.php b/chapter_10/opengraph-php-parser/og_test.php
@@ -0,0 +1,9 @@
+<?php
+require_once('OpenGraph.php');
+
+$url = 'http://www.yelp.com/biz/the-restaurant-at-wente-vineyards-livermore-2';
+$graph = new OpenGraph($url);
+print_r($graph->get_one('title'));
+print_r($graph->get_all());
+
+?>
diff --git a/chapter_10/opengraph-python-parser/OpenGraph.py b/chapter_10/opengraph-python-parser/OpenGraph.py
@@ -0,0 +1,70 @@
+import urllib
+import re
+from BeautifulSoup import BeautifulSoup
+
+"""
+" Class: Open Graph Parser
+" Description: Parses an HTML document to retrieve and store Open Graph
+"              tags from the meta data
+" Author: Jonathan LeBlanc
+" License: Creative Commons (http://creativecommons.org/licenses/by-sa/2.0/)
+" Useage:
+"    url = 'http://www.nhl.com/ice/player.htm?id=8468482';
+"    og_instance = OpenGraphParser(url)  
+"    print og_instance.get_one('og:title')
+"    print og_instance.get_all()
+"""
+class OpenGraphParser:
+    og_content = {}
+
+    """
+    " Method: Init
+    " Description: Initializes the open graph fetch.  If url was provided,
+    "              og_content will be set to return value of get_graph method
+    " Arguments: url (string) - The URL from which to collect the OG data
+    """
+    def __init__(self, url):
+        if url is not None:
+            self.og_content = self.get_graph(url)
+
+    """
+    " Method: Get Open Graph
+    " Description: Fetches HTML from provided url then filters to only meta tags.
+    "              Goes through all meta tags and any starting with og: get
+    "              stored and returned to the init method.
+    " Arguments: url (string) - The URL from which to collect the OG data
+    " Returns: dictionary - The matching OG tags
+    """
+    def get_graph(self, url):
+        #fetch all meta tags from the source of the url
+        sock = urllib.urlopen(url) 
+        htmlSource = sock.read()                            
+        sock.close()                                        
+        soup = BeautifulSoup(htmlSource)
+        meta = soup.findAll('meta')
+
+        #get all og:* tags from meta data
+        content = {}
+        for tag in meta:
+            if tag.has_key('property'):
+                if re.search('og:', tag['property']) is not None:
+                    content[re.sub('og:', '', tag['property'])] = tag['content']
+
+        return content
+
+    """
+    " Method: Get One Tag
+    " Description: Returns the content of one OG tag
+    " Arguments: tag (string) - The OG tag whose content should be returned
+    " Returns: string - the value of the OG tag
+    """
+    def get_one(self, tag):
+        return self.og_content[tag]
+
+    """
+    " Method: Get All Tags
+    " Description: Returns all found OG tags
+    " Returns: dictionary - All OG tags
+    """  
+    def get_all(self):
+        return self.og_content
diff --git a/chapter_10/opengraph-python-parser/OpenGraph.pyc b/chapter_10/opengraph-python-parser/OpenGraph.pyc
diff --git a/chapter_10/opengraph-python-parser/index.py b/chapter_10/opengraph-python-parser/index.py
@@ -0,0 +1,9 @@
+from OpenGraph import OpenGraphParser
+
+#initialize open graph parser class instance with url
+url = 'http://www.nhl.com/ice/player.htm?id=8468482';
+og_instance = OpenGraphParser(url)
+
+#output since description and entire og tag dictionary
+print og_instance.get_one('description')
+print og_instance.get_all()
diff --git a/chapter_10/pubsubhubbub-publisher-php/publisher.php b/chapter_10/pubsubhubbub-publisher-php/publisher.php
@@ -0,0 +1,66 @@
+<?php
+/* Copyright 2011 Jonathan LeBlanc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Class: PubSubHubbub Publisher
+ * Description: Allows for the publishing of new updates to the hub
+ */
+class Publisher{
+    private $regex_url = '|^https?://|i';       //simple URL string validator
+    private $hub = '';                          //hub URL
+
+    //constructor that stores the hub and callback URLs for the subscriber
+    public function __construct($hub){
+        if (preg_match($this->regex_url, $hub)){ $this->hub = $hub; }
+        else{ throw new Exception('Invalid hub URL supplied'); }
+    }
+
+    //makes request to hub to subscribe / unsubscribe
+    public function publish($feeds){
+        //set up POST string with mode
+        $post_string = 'hub.mode=publish';
+
+        //loop through each feed provided
+        foreach ($feeds as $feed){
+            //if feed is valid, add to POST string
+            if (preg_match($this->regex_url, $feed)){
+                $post_string .= '&hub.url=' . urlencode($feed);
+            } else {
+                throw new Exception('Invalid hub URL supplied');
+            }
+        }
+
+        //set up cURL request
+        $ch = curl_init($this->hub);
+        $options = array(
+            CURLOPT_HEADER => true,
+            CURLINFO_HEADER_OUT => true,
+            CURLOPT_VERBOSE => true,
+            CURLOPT_RETURNTRANSFER => true,
+            CURLOPT_POSTFIELDS => $post_string,
+            CURLOPT_CUSTOMREQUEST => 'POST'
+        );
+        curl_setopt_array($ch, $options);
+
+        //make request to hub
+        $response = curl_exec($ch);
+        curl_close($ch);
+
+        //return response
+        return $response;
+    }
+}
+?>
diff --git a/chapter_10/pubsubhubbub-publisher-php/publisher_example.php b/chapter_10/pubsubhubbub-publisher-php/publisher_example.php
@@ -0,0 +1,18 @@
+<?php
+include("publisher.php");
+
+//define hub and feeds
+$hub = 'http://pubsubhubbub.appspot.com/';
+$feeds = array('http://www.example.com/feed1.xml',
+               'http://www.example.com/feed2.xml',
+               'http://www.example.com/feed3.xml');
+
+//create new subscriber
+$publisher = new Publisher($hub);
+
+//publish feeds
+$response = $publisher->publish($feed);
+
+//print response
+var_dump($response);
+?>
diff --git a/chapter_10/pubsubhubbub-publisher-python/app.yaml b/chapter_10/pubsubhubbub-publisher-python/app.yaml
@@ -0,0 +1,9 @@
+application: publisher-python
+version: 1
+runtime: python
+api_version: 1
+
+handlers:
+
+- url: .*
+  script: publisher_example.py
diff --git a/chapter_10/pubsubhubbub-publisher-python/publisher.py b/chapter_10/pubsubhubbub-publisher-python/publisher.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Jonathan LeBlanc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import re
+import urllib
+import urllib2
+
+'''
+' Class: Publishing Error
+' Description: Custom error class for publishing exceptions
+'''
+class PublishError(Exception):
+    def __init__(self, value):
+        self.value = value
+    def __str__(self):
+        return repr(self.value)
+
+'''
+' Class: Publisher
+' Description: Provides ability to publish updates for feeds
+'''
+class Publisher:
+    regex_url = re.compile('^https?://')    #simple URL string validator
+
+    #constructor that stores the hub for the publisher
+    def __init__(self, hub):
+        if self.regex_url.match(hub): self.hub = hub
+        else: raise PublishError('Invalid hub URL supplied')
+
+    #makes request to hub to update feeds
+    def publish(self, feeds):
+        #set the POST string mode
+        post_string = 'hub.mode=publish'
+
+        #add each feed as a URL in the POST string, unless invalid URL
+        for feed in feeds:
+            if self.regex_url.match(feed):
+                post_string += '&hub.url=%s' % (urllib.quote(feed))
+            else:
+                raise PublishError('Invalid feed URL supplied: %s' % (feed))
+
+        try:
+            #make request to hub
+            file = urllib2.urlopen(self.hub, post_string)
+            return True
+        except (IOError, urllib2.HTTPError), e:
+            #process http conditions in 2xx range as valid
+            if hasattr(e, 'code') and str(e.code)[0] == '2':
+                return True
+
+            #process alternative error conditions
+            error = ''
+            if hasattr(e, 'read'):
+                error = e.read()
+            raise PublishError('%s, Response: "%s"' % (e, error))    
diff --git a/chapter_10/pubsubhubbub-publisher-python/publisher_example.py b/chapter_10/pubsubhubbub-publisher-python/publisher_example.py
@@ -0,0 +1,17 @@
+from publisher import *
+
+#define hub and feeds
+hub = 'http://pubsubhubbub.appspot.com/'
+feeds = ['http://www.example.com/feed1.xml', 'http://www.example.com/feed2.xml', 'http://www.example.com/feed3.xml']
+
+#create new publisher
+publisher = Publisher(hub)
+
+#publish feed updates: response == True on success
+response = publisher.publish(feeds)
+
+#print message on success
+if (response == True):
+    print 'Content-Type: text/plain'
+    print ''
+    print 'Update successful'