Skip to content

Commit

Permalink
Chapter 10 distributed web framework examples
Browse files Browse the repository at this point in the history
  • Loading branch information
jcleblanc committed May 26, 2011
1 parent caf7ccc commit 0752f2b
Show file tree
Hide file tree
Showing 15 changed files with 540 additions and 0 deletions.
75 changes: 75 additions & 0 deletions chapter_10/opengraph-php-parser/OpenGraph.php
@@ -0,0 +1,75 @@
<?php
/*******************************************************************************
* Class Name: Open Graph Parser
* Description: Parses an HTML document to retrieve and store Open Graph
* tags from the meta data
* Author: Jonathan LeBlanc (Twitter: @jcleblanc)
* License: Creative Commons (http://creativecommons.org/licenses/by-sa/2.0/)
* Useage:
* $url = 'http://www.example.com/index.html';
* $graph = new OpenGraph($url);
* print_r($graph->get_one('title')); //get only title element
* print_r($graph->get_all()); //return all Open Graph tags
******************************************************************************/
class OpenGraph{
//the open graph associative array
private static $og_content = array();

/***************************************************************************
* Function: Class Constructor
* Description: Initiates the request to fetch OG data
* Params: $url (string) - URL of page to collect OG tags from
**************************************************************************/
public function __construct($url){
if ($url){
self::$og_content = self::get_graph($url);
}
}

/***************************************************************************
* Function: Get Open Graph
* Description: Initiates the request to fetch OG data
* Params: $url (string) - URL of page to collect OG tags from
* Return: Object - associative array containing the OG data in format
* property : content
**************************************************************************/
private function get_graph($url){
//fetch html content from web source and filter to meta data
$dom = new DOMDocument();
@$dom->loadHtmlFile($url);
$tags = $dom->getElementsByTagName('meta');

//set open graph search tag and return object
$og_pattern = '/^og:/';
$graph_content = array();

//for each open graph tag, store in return object as property : content
foreach ($tags as $element){
if (preg_match($og_pattern, $element->getAttribute('property'))){
$graph_content[preg_replace($og_pattern, '', $element->getAttribute('property'))] = $element->getAttribute('content');
}
}

//store all open graph tags
return $graph_content;
}

/***************************************************************************
* Function: Get One Tag
* Description: Fetches the content of one OG tag
* Return: String - the content of one requested OG tag
**************************************************************************/
public function get_one($element){
return self::$og_content[$element];
}

/***************************************************************************
* Function: Get All Tags
* Description: Fetches the content of one OG tag
* Return: Object - The entire OG associative array
**************************************************************************/
public function get_all(){
return self::$og_content;
}
}
?>
9 changes: 9 additions & 0 deletions chapter_10/opengraph-php-parser/og_test.php
@@ -0,0 +1,9 @@
<?php
require_once('OpenGraph.php');

$url = 'http://www.yelp.com/biz/the-restaurant-at-wente-vineyards-livermore-2';
$graph = new OpenGraph($url);
print_r($graph->get_one('title'));
print_r($graph->get_all());

?>
70 changes: 70 additions & 0 deletions chapter_10/opengraph-python-parser/OpenGraph.py
@@ -0,0 +1,70 @@
import urllib
import re
from BeautifulSoup import BeautifulSoup

"""
" Class: Open Graph Parser
" Description: Parses an HTML document to retrieve and store Open Graph
" tags from the meta data
" Author: Jonathan LeBlanc
" License: Creative Commons (http://creativecommons.org/licenses/by-sa/2.0/)
" Useage:
" url = 'http://www.nhl.com/ice/player.htm?id=8468482';
" og_instance = OpenGraphParser(url)
" print og_instance.get_one('og:title')
" print og_instance.get_all()
"""
class OpenGraphParser:
og_content = {}

"""
" Method: Init
" Description: Initializes the open graph fetch. If url was provided,
" og_content will be set to return value of get_graph method
" Arguments: url (string) - The URL from which to collect the OG data
"""
def __init__(self, url):
if url is not None:
self.og_content = self.get_graph(url)

"""
" Method: Get Open Graph
" Description: Fetches HTML from provided url then filters to only meta tags.
" Goes through all meta tags and any starting with og: get
" stored and returned to the init method.
" Arguments: url (string) - The URL from which to collect the OG data
" Returns: dictionary - The matching OG tags
"""
def get_graph(self, url):
#fetch all meta tags from the source of the url
sock = urllib.urlopen(url)
htmlSource = sock.read()
sock.close()
soup = BeautifulSoup(htmlSource)
meta = soup.findAll('meta')

#get all og:* tags from meta data
content = {}
for tag in meta:
if tag.has_key('property'):
if re.search('og:', tag['property']) is not None:
content[re.sub('og:', '', tag['property'])] = tag['content']

return content

"""
" Method: Get One Tag
" Description: Returns the content of one OG tag
" Arguments: tag (string) - The OG tag whose content should be returned
" Returns: string - the value of the OG tag
"""
def get_one(self, tag):
return self.og_content[tag]

"""
" Method: Get All Tags
" Description: Returns all found OG tags
" Returns: dictionary - All OG tags
"""
def get_all(self):
return self.og_content
Binary file added chapter_10/opengraph-python-parser/OpenGraph.pyc
Binary file not shown.
9 changes: 9 additions & 0 deletions chapter_10/opengraph-python-parser/index.py
@@ -0,0 +1,9 @@
from OpenGraph import OpenGraphParser

#initialize open graph parser class instance with url
url = 'http://www.nhl.com/ice/player.htm?id=8468482';
og_instance = OpenGraphParser(url)

#output since description and entire og tag dictionary
print og_instance.get_one('description')
print og_instance.get_all()
66 changes: 66 additions & 0 deletions chapter_10/pubsubhubbub-publisher-php/publisher.php
@@ -0,0 +1,66 @@
<?php
/* Copyright 2011 Jonathan LeBlanc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* Class: PubSubHubbub Publisher
* Description: Allows for the publishing of new updates to the hub
*/
class Publisher{
private $regex_url = '|^https?://|i'; //simple URL string validator
private $hub = ''; //hub URL

//constructor that stores the hub and callback URLs for the subscriber
public function __construct($hub){
if (preg_match($this->regex_url, $hub)){ $this->hub = $hub; }
else{ throw new Exception('Invalid hub URL supplied'); }
}

//makes request to hub to subscribe / unsubscribe
public function publish($feeds){
//set up POST string with mode
$post_string = 'hub.mode=publish';

//loop through each feed provided
foreach ($feeds as $feed){
//if feed is valid, add to POST string
if (preg_match($this->regex_url, $feed)){
$post_string .= '&hub.url=' . urlencode($feed);
} else {
throw new Exception('Invalid hub URL supplied');
}
}

//set up cURL request
$ch = curl_init($this->hub);
$options = array(
CURLOPT_HEADER => true,
CURLINFO_HEADER_OUT => true,
CURLOPT_VERBOSE => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POSTFIELDS => $post_string,
CURLOPT_CUSTOMREQUEST => 'POST'
);
curl_setopt_array($ch, $options);

//make request to hub
$response = curl_exec($ch);
curl_close($ch);

//return response
return $response;
}
}
?>
18 changes: 18 additions & 0 deletions chapter_10/pubsubhubbub-publisher-php/publisher_example.php
@@ -0,0 +1,18 @@
<?php
include("publisher.php");

//define hub and feeds
$hub = 'http://pubsubhubbub.appspot.com/';
$feeds = array('http://www.example.com/feed1.xml',
'http://www.example.com/feed2.xml',
'http://www.example.com/feed3.xml');

//create new subscriber
$publisher = new Publisher($hub);

//publish feeds
$response = $publisher->publish($feed);

//print response
var_dump($response);
?>
9 changes: 9 additions & 0 deletions chapter_10/pubsubhubbub-publisher-python/app.yaml
@@ -0,0 +1,9 @@
application: publisher-python
version: 1
runtime: python
api_version: 1

handlers:

- url: .*
script: publisher_example.py
69 changes: 69 additions & 0 deletions chapter_10/pubsubhubbub-publisher-python/publisher.py
@@ -0,0 +1,69 @@
#!/usr/bin/env python
#
# Copyright 2011 Jonathan LeBlanc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import re
import urllib
import urllib2

'''
' Class: Publishing Error
' Description: Custom error class for publishing exceptions
'''
class PublishError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)

'''
' Class: Publisher
' Description: Provides ability to publish updates for feeds
'''
class Publisher:
regex_url = re.compile('^https?://') #simple URL string validator

#constructor that stores the hub for the publisher
def __init__(self, hub):
if self.regex_url.match(hub): self.hub = hub
else: raise PublishError('Invalid hub URL supplied')

#makes request to hub to update feeds
def publish(self, feeds):
#set the POST string mode
post_string = 'hub.mode=publish'

#add each feed as a URL in the POST string, unless invalid URL
for feed in feeds:
if self.regex_url.match(feed):
post_string += '&hub.url=%s' % (urllib.quote(feed))
else:
raise PublishError('Invalid feed URL supplied: %s' % (feed))

try:
#make request to hub
file = urllib2.urlopen(self.hub, post_string)
return True
except (IOError, urllib2.HTTPError), e:
#process http conditions in 2xx range as valid
if hasattr(e, 'code') and str(e.code)[0] == '2':
return True

#process alternative error conditions
error = ''
if hasattr(e, 'read'):
error = e.read()
raise PublishError('%s, Response: "%s"' % (e, error))
17 changes: 17 additions & 0 deletions chapter_10/pubsubhubbub-publisher-python/publisher_example.py
@@ -0,0 +1,17 @@
from publisher import *

#define hub and feeds
hub = 'http://pubsubhubbub.appspot.com/'
feeds = ['http://www.example.com/feed1.xml', 'http://www.example.com/feed2.xml', 'http://www.example.com/feed3.xml']

#create new publisher
publisher = Publisher(hub)

#publish feed updates: response == True on success
response = publisher.publish(feeds)

#print message on success
if (response == True):
print 'Content-Type: text/plain'
print ''
print 'Update successful'

0 comments on commit 0752f2b

Please sign in to comment.