Permalink
Browse files

Nested Comments. Refactoring.

  • Loading branch information...
glebpopoff committed Oct 23, 2011
1 parent de318c5 commit e6cc99d8b9ecac6afdd952bcfe513146b1fc0e8f
View
@@ -11,7 +11,6 @@
import urllib
import AppConfig
from google.appengine.api import urlfetch
-import DataCache
from BeautifulSoup import BeautifulSoup
from django.utils import simplejson
from structured import list2xml
@@ -22,243 +21,40 @@ def removeHtmlTags(data):
def removeNonAscii(s): return "" . join(filter(lambda x: ord(x)<128, s))
-#get cached content
-def getCache(pageId,format):
- logging.debug('getCache: %s' % pageId)
- dbData = DataCache.getData(pageId,format)
- if (dbData):
- if (DataCache.hasExpired(dbData)):
- #data has expired, remove it
- dbData[0].delete()
- return None
- else:
- logging.debug('getCache: got cached data for id %s' % id)
- return dbData[0].rec_xml
-
-#parse HN's submissions by user
-def getHackerNewsSubmittedContent(user, format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- apiURL = "%s/submitted?id=%s" % (AppConfig.hackerNewsURL, user)
- apiURLBackup = "%s/submitted?id=%s" % (AppConfig.hackerNewsURLBackup, user)
- id = '/submitted/%s' % (user)
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
+#call urlfetch to get remote data
+def fetchRemoteData(urlStr, deadline):
+ result = urlfetch.fetch(url=urlStr, deadline=deadline)
+ if result.status_code == 200:
+ return result
else:
- hnData = parsePageContent(apiURL,apiURLBackup, '/submitted', None,format)
- if (hnData):
- logging.debug('getHackerNewsSubmittedContent: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getHackerNewsSubmittedContent: unable to retrieve data for id %s' % id)
- return ''
-
-#parse HN's comments by story id
-def getHackerNewsComments(articleId, format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- apiURL = "%s/item?id=%s" % (AppConfig.hackerNewsURL, articleId)
- apiURLBackup = "%s/item?id=%s" % (AppConfig.hackerNewsURLBackup, articleId)
- id = '/comments/%s' % (articleId)
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parseCommentsContent(apiURL, apiURLBackup, '/comments', None,format)
- if (hnData):
- logging.debug('getHackerNewsComments: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getHackerNewsComments: unable to retrieve data for id %s' % id)
- return ''
-
-#parse HN's comments by story id
-def getHackerNewsNestedComments(articleId, format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- apiURL = "%s/item?id=%s" % (AppConfig.hackerNewsURL, articleId)
- apiURLBackup = "%s/item?id=%s" % (AppConfig.hackerNewsURLBackup, articleId)
- id = '/nestedcomments/%s' % (articleId)
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parseNestedCommentsContent(apiURL, apiURLBackup, '/nestedcomments', None,format)
- if (hnData):
- logging.debug('getHackerNewsComments: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getHackerNewsComments: unable to retrieve data for id %s' % id)
- return ''
-
-#parse HN's ask content
-def getHackerNewsAskContent(page='', format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- if (page):
- return parsePageContent(AppConfig.hackerNewsAskURL, AppConfig.hackerNewsAskURLBackup, '/ask', page,format)
- else:
- id = '/ask/'
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parsePageContent(AppConfig.hackerNewsAskURL, AppConfig.hackerNewsAskURLBackup, '/ask', page,format)
- if (hnData):
- logging.debug('getCache: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getCache: unable to retrieve data for id %s' % id)
- return ''
-
-#parse HN's best content
-def getHackerNewsBestContent(page='', format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- if (page):
- return parsePageContent(AppConfig.hackerNewsBestURL, AppConfig.hackerNewsBestURLBackup, '/best', page,format)
- else:
- id = '/best/'
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parsePageContent(AppConfig.hackerNewsBestURL, AppConfig.hackerNewsBestURLBackup, '/best', page,format)
- if (hnData):
- logging.debug('getCache: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getCache: unable to retrieve data for id %s' % id)
- return ''
-
-#parse HN's newest content
-def getHackerNewsNewestContent(page='', format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- if (page):
- return parsePageContent(AppConfig.hackerNewsNewestURL, AppConfig.hackerNewsNewestURLBackup,'/newest', page,format)
- else:
- id = '/newest/'
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parsePageContent(AppConfig.hackerNewsNewestURL, AppConfig.hackerNewsNewestURLBackup,'/newest', page,format)
- if (hnData):
- logging.debug('getCache: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getCache: unable to retrieve data for id %s' % id)
- return ''
-
-
-#get homepage second page stories
-def getHackerNewsSecondPageContent(page='', format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- if (page):
- return parsePageContent(AppConfig.hackerNewsPage2URL, AppConfig.hackerNewsPage2URLBackup,'/news2', page,format)
- else:
- id = '/news2'
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parsePageContent(AppConfig.hackerNewsPage2URL, AppConfig.hackerNewsPage2URLBackup, '/news2', page,format)
- if (hnData):
- logging.debug('getCache: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getCache: unable to retrieve data for id %s' % id)
- return ''
-
-#get latest homepage stories
-def getHackerNewsLatestContent(page='', format='json', url='', referer='', remote_addr='', limit=1):
- #only cache homepage data
- limit = int(limit)
- if (page):
- return parsePageContent(AppConfig.hackerNewsURL, AppConfig.hackerNewsURLBackup, '/latest', page,format,limit)
- else:
- id = '/latest/%s' % limit
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parsePageContent(AppConfig.hackerNewsURL,AppConfig.hackerNewsURLBackup, '/latest', page,format,limit)
- if (hnData):
- logging.debug('getCache: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getCache: unable to retrieve data for id %s' % id)
- return ''
-
-#parse HN's homepage
-def getHackerNewsPageContent(page='', format='json', url='', referer='', remote_addr=''):
- #only cache homepage data
- if (page):
- return parsePageContent(AppConfig.hackerNewsURL, AppConfig.hackerNewsURLBackup, '/news', page,format)
- else:
- id = '/news/'
- cachedData = getCache(id,format)
- if (cachedData):
- return cachedData
- else:
- hnData = parsePageContent(AppConfig.hackerNewsURL, AppConfig.hackerNewsURLBackup, '/news', page,format)
- if (hnData):
- logging.debug('getCache: storing cached value for id %s' % id)
- DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
- return hnData
- else:
- logging.warning('getCache: unable to retrieve data for id %s' % id)
- return ''
+ logging.error('fetchRemoteData: unable to get remote data: %s' % urlStr)
+ raise Exception("fetchRemoteData: failed")
#call remote server to get data. If failed (timeout), try again and again and again and again (4 attempts because urlfetch on the GAE f-ing sucks)
def getRemoteData(urlStr, backupUrl):
#attempt #1
try:
logging.debug('getRemoteData: Attempt #1: %s' % urlStr)
- result = urlfetch.fetch(url=urlStr, deadline=30)
- if result.status_code == 200:
- return result
- else:
- logging.error('getRemoteData: unable to get remote data...Attempt #1: %s' % urlStr)
- raise Exception("getRemoteData: Attempt #1 failed")
+ return fetchRemoteData(urlStr, 30)
except:
#attempt #2
try:
logging.debug('getRemoteData: First attempt failed... Attempt #2(Backup URL): %s' % backupUrl)
- result = urlfetch.fetch(url=backupUrl, deadline=30)
- if result.status_code == 200:
- return result
- else:
- logging.error('getRemoteData: unable to get remote data...Attempt #2')
- raise Exception("getRemoteData: Attempt #2 failed")
+ return fetchRemoteData(backupUrl, 30)
except:
#attempt #3
try:
logging.debug('getRemoteData: First attempt failed... Attempt #3: %s' % urlStr)
- result = urlfetch.fetch(url=urlStr, deadline=30)
- if result.status_code == 200:
- return result
- else:
- logging.error('getRemoteData: unable to get remote data...Attempt #3')
- raise Exception("getRemoteData: Attempt #3 failed")
+ return fetchRemoteData(urlStr, 30)
except:
#attempt #4
try:
logging.debug('getRemoteData: First attempt failed... Attempt #4 (Backup URL): %s' % backupUrl)
- result = urlfetch.fetch(url=backupUrl, deadline=30)
- if result.status_code == 200:
- return result
- else:
- logging.error('getRemoteData: unable to get remote data...Attempt #4')
- return None
+ return fetchRemoteData(backupUrl, 30)
except:
logging.error('getRemoteData: unable to get remote data...Attempt #4. Stack')
return None
- return jsonData
+ return None
#parse content using Beautiful Soup
def parsePageContent(hnAPIUrl,hnBackupAPIUrl, apiURL, page='',format='json',limit=0):
View
@@ -16,7 +16,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
-import APIUtils
+import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup
@@ -31,11 +31,10 @@ def get(self,format='json',page=''):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']
- returnData = MutableString()
- returnData = APIUtils.getHackerNewsAskContent(page,format,self.request.url, referer, self.request.remote_addr)
+ returnData = APIContent.getHackerNewsAskContent(page,format,self.request.url, referer, self.request.remote_addr)
if (not returnData or returnData == None or returnData == '' or returnData == 'None'):
#call the service again this time without the pageID
- returnData = APIUtils.getHackerNewsAskContent('',format,self.request.url, referer, self.request.remote_addr)
+ returnData = APIContent.getHackerNewsAskContent('',format,self.request.url, referer, self.request.remote_addr)
#track this request
GAHelper.trackGARequests('/ask', self.request.remote_addr, referer)
View
@@ -16,7 +16,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
-import APIUtils
+import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup
@@ -31,11 +31,10 @@ def get(self,format='json',page=''):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']
- returnData = MutableString()
- returnData = APIUtils.getHackerNewsBestContent(page,format,self.request.url, referer, self.request.remote_addr)
+ returnData = APIContent.getHackerNewsBestContent(page,format,self.request.url, referer, self.request.remote_addr)
if (not returnData or returnData == None or returnData == '' or returnData == 'None'):
#call the service again this time without the pageID
- returnData = APIUtils.getHackerNewsBestContent('',format,self.request.url, referer, self.request.remote_addr)
+ returnData = APIContent.getHackerNewsBestContent('',format,self.request.url, referer, self.request.remote_addr)
#track this request
GAHelper.trackGARequests('/best', self.request.remote_addr, referer)
View
@@ -18,7 +18,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
-import APIUtils
+import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup
@@ -33,9 +33,8 @@ def get(self,format,id):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']
- returnData = MutableString()
- returnData = APIUtils.getHackerNewsComments(id,format,self.request.url, referer, self.request.remote_addr)
-
+ returnData = APIContent.getHackerNewsComments(id,format,self.request.url, referer, self.request.remote_addr)
+
#track this request
GAHelper.trackGARequests('/comments/%s' % (id), self.request.remote_addr, referer)
View
@@ -18,7 +18,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
-import APIUtils
+import APIContent
import GAHelper
class HackerNewsLatestPageHandler(webapp.RequestHandler):
@@ -32,8 +32,7 @@ def get(self,format='json',limit=1):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']
- returnData = MutableString()
- returnData = APIUtils.getHackerNewsLatestContent('',format,self.request.url, referer, self.request.remote_addr, limit)
+ returnData = APIContent.getHackerNewsLatestContent('',format,self.request.url, referer, self.request.remote_addr, limit)
#track this request
GAHelper.trackGARequests('/latest', self.request.remote_addr, referer)
@@ -17,7 +17,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
-import APIUtils
+import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup
@@ -32,8 +32,7 @@ def get(self,format,id):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']
- returnData = MutableString()
- returnData = APIUtils.getHackerNewsNestedComments(id,format,self.request.url, referer, self.request.remote_addr)
+ returnData = APIContent.getHackerNewsNestedComments(id,format,self.request.url, referer, self.request.remote_addr)
#track this request
GAHelper.trackGARequests('/nestedcomments/%s' % (id), self.request.remote_addr, referer)
View
@@ -16,7 +16,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
-import APIUtils
+import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup
@@ -31,11 +31,10 @@ def get(self,format='json',page=''):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']
- returnData = MutableString()
- returnData = APIUtils.getHackerNewsNewestContent(page,format,self.request.url, referer, self.request.remote_addr)
+ returnData = APIContent.getHackerNewsNewestContent(page,format,self.request.url, referer, self.request.remote_addr)
if (not returnData or returnData == None or returnData == '' or returnData == 'None'):
#call the service again this time without the pageID
- returnData = APIUtils.getHackerNewsNewestContent('',format,self.request.url, referer, self.request.remote_addr)
+ returnData = APIContent.getHackerNewsNewestContent('',format,self.request.url, referer, self.request.remote_addr)
#track this request
GAHelper.trackGARequests('/newest', self.request.remote_addr, referer)
Oops, something went wrong.

0 comments on commit e6cc99d

Please sign in to comment.