Permalink
Browse files

Initial version of Real-time Facebook Graph API Proxy

  • Loading branch information...
1 parent e40b6e9 commit 576ce86558ba6ee50fbc8322487c4ef00c557b33 ypisetsky committed Jun 7, 2010
Showing with 1,205 additions and 0 deletions.
  1. +44 −0 README
  2. +57 −0 config.sample
  3. +1 −0 fbproxy/__init__.py
  4. +111 −0 fbproxy/apps.py
  5. +193 −0 fbproxy/cache.py
  6. +28 −0 fbproxy/config.py
  7. +65 −0 fbproxy/hashdict.py
  8. +66 −0 fbproxy/launcher.py
  9. +120 −0 fbproxy/lru.py
  10. +228 −0 fbproxy/requesthandler.py
  11. +154 −0 fbproxy/rtendpoint.py
  12. +89 −0 fbproxy/rturegister.py
  13. +14 −0 setup.py
  14. +35 −0 start_proxy
View
44 README
@@ -0,0 +1,44 @@
+=== Facebook Graph Proxy with Realtime Invalidation ===
+The Facebook Graph Proxy is a simple proxy for graph.facebook.com that takes
+advantage of the Realtime Updates feature to invalidate stale data. It can be
+used by replacing references to graph.facebook.com with references to the
+proxy.
+
+== Installation ==
+To install:
+
+ python setup.py build
+ sudo python setup.py install
+
+The Facebook Graph Proxy also requires the CherryPy WSGI server to function.
+This may be installed using:
+
+ sudo easy_install cherrypy
+
+== Configuration ==
+To start, copy config.sample into config.txt. From here, you will need to
+update the following values:
+
+ proxy_port: the port on which the server will listen for requests which
+ need to be proxied.
+ realtime_port: the port on which the server will listen for Realtime
+ Updates from Facebook.
+ cache_entries: The number of entries that should be stored in the cache
+ before dropping the least-recently-used entry
+ public_hostname: The publicly-visible hostname that Facebook should use
+ to reach the Realtime Update endpoint of this server
+
+In addition, you will need to enter information about the Facebook
+Applications which will be using this server. See the config file for details.
+
+== Security Considerations ==
+In order to avoid leaking data about users, the proxy port must only be
+accessible by your own servers.
+
+== Execution instructions ==
+To run the proxy:
+
+ start_proxy [config_file]
+
+If config_file is not passed, then the proxy will default to using config.txt.
+
View
@@ -0,0 +1,57 @@
+
+# proxy endpoint settings
+# this endpoint must NOT be visible from untrusted sources, or an attacker
+# could retrieve data from the cache without valid authentication
+proxy_port = 14567
+proxy_interface = '0.0.0.0'
+
+# realtime-update endpoint settings
+# this endpoint must be visible from Facebook.
+realtime_port = 14568
+realtime_interface = '0.0.0.0'
+public_hostname = "server.domain.com"
+
+# cache settings
+cache_entries = 10000
+
+
+# application settings: Each application should be specified
+# in a format similar to the following example:
+# apps[] = { 'appid': 12345, 'property': value }
+#
+# Currently supported properties
+# appid - the Application ID
+# app_cred - an access_token (without the 'access_token=' which
+# is a valid client_cred access token for the application.
+# This is used to fetch subscription information on startup.
+# app_secret - the application secret. This is used only if app_cred
+# is not provided to fetch subscription information on startup
+#
+# blacklist_fields - Fields which will force a request to bypass the cache
+# if present in a ?fields= query string param. This should be an array
+# of strings (i.e. ['hometown', 'last_name'])
+# whitelist_fields - If present, will only consider requests consisting of only
+# these fields as eligible for caching. It is recommended that
+# provide exactly one of app_cred, app_secret, and whitelist_fields
+# Note: these apply only to fetches of a user (i.e. /userid?fields=name,link)
+# Note 2: if one of app_cred or app_secret is specified, the realtime
+# subscription for the app will be updated to point to our endpoint.
+#
+# blacklist_connections - Connections for which requests will be forced to
+# bypass the cache. (for instance /userid/friends)
+# whitelist_connections - If present, will only consider connections on this
+# as eligible for caching. The notes for whitelist_fields apply here too.
+
+
+app_1 = {
+ 'app_id': '123456789012345', \
+ 'app_cred': '123456789012345|REPLACEWITHYOURCRED.',
+ 'whitelist_fields': ['first_name', 'last_name', 'name', 'about', 'bio',
+ 'about', 'relationship_status', 'email', 'significant_other',
+ 'hometown', 'location', 'work', 'education', 'gender'],
+ 'whitelist_connections': ['friends', 'feed', 'movies', 'books',
+ 'family', 'activities', 'interests', 'music', 'television',
+ 'statuses', 'links', 'picture']}
+
+
+apps = [app_1]
View
@@ -0,0 +1 @@
+""" A realtime-api-invalidated cache for the Facebook Graph API."""
View
@@ -0,0 +1,111 @@
+#
+# Copyright 2010 Facebook
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+""" A container for app-specific data and functionality."""
+import threading
+import logging
+
+
+class App(object):
+ """ Manages Facebook Application-specific settings and policies
+
+ This class serves two purposes. First, it serves as a repository of
+ information about an application (such as the users we have seen for it
+ and the configuration settings for it). Second, it exposes two methods
+ which are used in making cache-eligibilty decisions in ProxyRequestHandler,
+ check_user and check_request. check_user adds the requestor to the app's
+ list of seen users, and then sees if the user whose data we're requesting
+ has been seen before (only users who we are sure have added an app will be
+ updated by realtime updates, so we only cache requests for those users'
+ data. check_request ensures that the request is only for data which is
+ part of the app's realtime update subscription, and is not blacklisted.
+ """
+
+ def __init__(self, config):
+ self.id = config['app_id']
+ self.bad_fields = set()
+ self.bad_conns = set()
+ self.good_fields = set()
+ self.good_conns = set()
+ self.users = set()
+ self.lock = threading.Lock()
+ self.cred = config.get('app_cred')
+ self.secret = config.get('app_secret')
+ if 'blacklist_fields' in config:
+ self.bad_fields.update(config['blacklist_fields'])
+ if 'blacklist_connections' in config:
+ self.bad_conns.update(config['blacklist_connections'])
+ if 'whitelist_fields' in config:
+ self.good_fields = set(config['whitelist_fields'])
+ if 'whitelist_connections' in config:
+ self.good_conns = set(config['whitelist_connections'])
+ self.good_fields -= self.bad_fields
+ self.good_conns -= self.bad_conns
+
+ def check_user(self, requestor, requestee, default=None):
+ """ Check a request's users.
+
+ Adds the requestor to the known users for the app, and checks
+ if the requestee is a known user of the app. Also adds the user
+ to the default app, since we'll get updates for them.
+ """
+ self.lock.acquire()
+ self.users.add(requestor)
+ ok = requestee in self.users
+ self.lock.release()
+
+ # if this isn't the default app, also add the user to the default app
+ if default != self and default != None:
+ default.check_user(requestor, requestee)
+
+ return ok
+
+ def check_request(self, pathparts, fields=None):
+ """ Returns whether a request is cacheable."""
+ if not fields:
+ fields = []
+ if len(pathparts) == 1: # this is a request for direct profile fields
+ if len(set(fields) - self.good_fields) == 0:
+ return True
+ logging.info('got fields ' + repr(fields) + ' but only '
+ + repr(self.good_fields) + ' is ok')
+ elif len(pathparts) == 2: # this is a request for a connection
+ return pathparts[1] in self.good_conns
+ return False # safety: if we're not certain about it, fall back to
+ # passthrough behavior
+
+
+def init(configapps):
+ """ Initializes the mapping of app ids to the App objects from config"""
+ apps = dict((str(x['app_id']), App(x)) for x in configapps)
+ if 'default' not in apps: # Add the default app if settings haven't been
+ # defined for it already.
+ default_app = App({'app_id': 'default'})
+ intersect = lambda x, y: x & y
+ default_app.good_fields = reduce(intersect, [x.good_fields for x
+ in apps.itervalues()])
+ default_app.good_conns = reduce(intersect, [x.good_conns for x in
+ apps.itervalues()])
+ apps['default'] = default_app
+ return apps
+
+
+def get_app(app_id, app_set):
+ """Look up the given app in the app_set, using the default if needed."""
+ if app_id in app_set:
+ return app_set[app_id]
+ if 'default' in app_set:
+ return app_set['default']
+ return None
Oops, something went wrong.

0 comments on commit 576ce86

Please sign in to comment.