Skip to content

Commit

Permalink
Nested Comments. Refactoring.
Browse files Browse the repository at this point in the history
  • Loading branch information
glebpopoff committed Oct 23, 2011
1 parent de318c5 commit e6cc99d
Show file tree
Hide file tree
Showing 14 changed files with 109 additions and 287 deletions.
228 changes: 12 additions & 216 deletions APIUtils.py
Expand Up @@ -11,7 +11,6 @@
import urllib
import AppConfig
from google.appengine.api import urlfetch
import DataCache
from BeautifulSoup import BeautifulSoup
from django.utils import simplejson
from structured import list2xml
Expand All @@ -22,243 +21,40 @@ def removeHtmlTags(data):

def removeNonAscii(s): return "" . join(filter(lambda x: ord(x)<128, s))

#get cached content
def getCache(pageId,format):
logging.debug('getCache: %s' % pageId)
dbData = DataCache.getData(pageId,format)
if (dbData):
if (DataCache.hasExpired(dbData)):
#data has expired, remove it
dbData[0].delete()
return None
else:
logging.debug('getCache: got cached data for id %s' % id)
return dbData[0].rec_xml

#parse HN's submissions by user
def getHackerNewsSubmittedContent(user, format='json', url='', referer='', remote_addr=''):
#only cache homepage data
apiURL = "%s/submitted?id=%s" % (AppConfig.hackerNewsURL, user)
apiURLBackup = "%s/submitted?id=%s" % (AppConfig.hackerNewsURLBackup, user)
id = '/submitted/%s' % (user)
cachedData = getCache(id,format)
if (cachedData):
return cachedData
#call urlfetch to get remote data
def fetchRemoteData(urlStr, deadline):
result = urlfetch.fetch(url=urlStr, deadline=deadline)
if result.status_code == 200:
return result
else:
hnData = parsePageContent(apiURL,apiURLBackup, '/submitted', None,format)
if (hnData):
logging.debug('getHackerNewsSubmittedContent: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getHackerNewsSubmittedContent: unable to retrieve data for id %s' % id)
return ''

#parse HN's comments by story id
def getHackerNewsComments(articleId, format='json', url='', referer='', remote_addr=''):
#only cache homepage data
apiURL = "%s/item?id=%s" % (AppConfig.hackerNewsURL, articleId)
apiURLBackup = "%s/item?id=%s" % (AppConfig.hackerNewsURLBackup, articleId)
id = '/comments/%s' % (articleId)
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parseCommentsContent(apiURL, apiURLBackup, '/comments', None,format)
if (hnData):
logging.debug('getHackerNewsComments: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getHackerNewsComments: unable to retrieve data for id %s' % id)
return ''

#parse HN's comments by story id
def getHackerNewsNestedComments(articleId, format='json', url='', referer='', remote_addr=''):
#only cache homepage data
apiURL = "%s/item?id=%s" % (AppConfig.hackerNewsURL, articleId)
apiURLBackup = "%s/item?id=%s" % (AppConfig.hackerNewsURLBackup, articleId)
id = '/nestedcomments/%s' % (articleId)
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parseNestedCommentsContent(apiURL, apiURLBackup, '/nestedcomments', None,format)
if (hnData):
logging.debug('getHackerNewsComments: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getHackerNewsComments: unable to retrieve data for id %s' % id)
return ''

#parse HN's ask content
def getHackerNewsAskContent(page='', format='json', url='', referer='', remote_addr=''):
#only cache homepage data
if (page):
return parsePageContent(AppConfig.hackerNewsAskURL, AppConfig.hackerNewsAskURLBackup, '/ask', page,format)
else:
id = '/ask/'
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parsePageContent(AppConfig.hackerNewsAskURL, AppConfig.hackerNewsAskURLBackup, '/ask', page,format)
if (hnData):
logging.debug('getCache: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getCache: unable to retrieve data for id %s' % id)
return ''

#parse HN's best content
def getHackerNewsBestContent(page='', format='json', url='', referer='', remote_addr=''):
#only cache homepage data
if (page):
return parsePageContent(AppConfig.hackerNewsBestURL, AppConfig.hackerNewsBestURLBackup, '/best', page,format)
else:
id = '/best/'
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parsePageContent(AppConfig.hackerNewsBestURL, AppConfig.hackerNewsBestURLBackup, '/best', page,format)
if (hnData):
logging.debug('getCache: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getCache: unable to retrieve data for id %s' % id)
return ''

#parse HN's newest content
def getHackerNewsNewestContent(page='', format='json', url='', referer='', remote_addr=''):
#only cache homepage data
if (page):
return parsePageContent(AppConfig.hackerNewsNewestURL, AppConfig.hackerNewsNewestURLBackup,'/newest', page,format)
else:
id = '/newest/'
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parsePageContent(AppConfig.hackerNewsNewestURL, AppConfig.hackerNewsNewestURLBackup,'/newest', page,format)
if (hnData):
logging.debug('getCache: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getCache: unable to retrieve data for id %s' % id)
return ''


#get homepage second page stories
def getHackerNewsSecondPageContent(page='', format='json', url='', referer='', remote_addr=''):
#only cache homepage data
if (page):
return parsePageContent(AppConfig.hackerNewsPage2URL, AppConfig.hackerNewsPage2URLBackup,'/news2', page,format)
else:
id = '/news2'
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parsePageContent(AppConfig.hackerNewsPage2URL, AppConfig.hackerNewsPage2URLBackup, '/news2', page,format)
if (hnData):
logging.debug('getCache: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getCache: unable to retrieve data for id %s' % id)
return ''

#get latest homepage stories
def getHackerNewsLatestContent(page='', format='json', url='', referer='', remote_addr='', limit=1):
#only cache homepage data
limit = int(limit)
if (page):
return parsePageContent(AppConfig.hackerNewsURL, AppConfig.hackerNewsURLBackup, '/latest', page,format,limit)
else:
id = '/latest/%s' % limit
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parsePageContent(AppConfig.hackerNewsURL,AppConfig.hackerNewsURLBackup, '/latest', page,format,limit)
if (hnData):
logging.debug('getCache: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getCache: unable to retrieve data for id %s' % id)
return ''

#parse HN's homepage
def getHackerNewsPageContent(page='', format='json', url='', referer='', remote_addr=''):
#only cache homepage data
if (page):
return parsePageContent(AppConfig.hackerNewsURL, AppConfig.hackerNewsURLBackup, '/news', page,format)
else:
id = '/news/'
cachedData = getCache(id,format)
if (cachedData):
return cachedData
else:
hnData = parsePageContent(AppConfig.hackerNewsURL, AppConfig.hackerNewsURLBackup, '/news', page,format)
if (hnData):
logging.debug('getCache: storing cached value for id %s' % id)
DataCache.putData(id, format,removeNonAscii(hnData), url, referer, remote_addr)
return hnData
else:
logging.warning('getCache: unable to retrieve data for id %s' % id)
return ''
logging.error('fetchRemoteData: unable to get remote data: %s' % urlStr)
raise Exception("fetchRemoteData: failed")

#call remote server to get data. If failed (timeout), try again and again and again and again (4 attempts because urlfetch on the GAE f-ing sucks)
def getRemoteData(urlStr, backupUrl):
#attempt #1
try:
logging.debug('getRemoteData: Attempt #1: %s' % urlStr)
result = urlfetch.fetch(url=urlStr, deadline=30)
if result.status_code == 200:
return result
else:
logging.error('getRemoteData: unable to get remote data...Attempt #1: %s' % urlStr)
raise Exception("getRemoteData: Attempt #1 failed")
return fetchRemoteData(urlStr, 30)
except:
#attempt #2
try:
logging.debug('getRemoteData: First attempt failed... Attempt #2(Backup URL): %s' % backupUrl)
result = urlfetch.fetch(url=backupUrl, deadline=30)
if result.status_code == 200:
return result
else:
logging.error('getRemoteData: unable to get remote data...Attempt #2')
raise Exception("getRemoteData: Attempt #2 failed")
return fetchRemoteData(backupUrl, 30)
except:
#attempt #3
try:
logging.debug('getRemoteData: First attempt failed... Attempt #3: %s' % urlStr)
result = urlfetch.fetch(url=urlStr, deadline=30)
if result.status_code == 200:
return result
else:
logging.error('getRemoteData: unable to get remote data...Attempt #3')
raise Exception("getRemoteData: Attempt #3 failed")
return fetchRemoteData(urlStr, 30)
except:
#attempt #4
try:
logging.debug('getRemoteData: First attempt failed... Attempt #4 (Backup URL): %s' % backupUrl)
result = urlfetch.fetch(url=backupUrl, deadline=30)
if result.status_code == 200:
return result
else:
logging.error('getRemoteData: unable to get remote data...Attempt #4')
return None
return fetchRemoteData(backupUrl, 30)
except:
logging.error('getRemoteData: unable to get remote data...Attempt #4. Stack')
return None
return jsonData
return None

#parse content using Beautiful Soup
def parsePageContent(hnAPIUrl,hnBackupAPIUrl, apiURL, page='',format='json',limit=0):
Expand Down
7 changes: 3 additions & 4 deletions GetHNAskHandler.py
Expand Up @@ -16,7 +16,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
import APIUtils
import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup

Expand All @@ -31,11 +31,10 @@ def get(self,format='json',page=''):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']

returnData = MutableString()
returnData = APIUtils.getHackerNewsAskContent(page,format,self.request.url, referer, self.request.remote_addr)
returnData = APIContent.getHackerNewsAskContent(page,format,self.request.url, referer, self.request.remote_addr)
if (not returnData or returnData == None or returnData == '' or returnData == 'None'):
#call the service again this time without the pageID
returnData = APIUtils.getHackerNewsAskContent('',format,self.request.url, referer, self.request.remote_addr)
returnData = APIContent.getHackerNewsAskContent('',format,self.request.url, referer, self.request.remote_addr)

#track this request
GAHelper.trackGARequests('/ask', self.request.remote_addr, referer)
Expand Down
7 changes: 3 additions & 4 deletions GetHNBestHandler.py
Expand Up @@ -16,7 +16,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
import APIUtils
import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup

Expand All @@ -31,11 +31,10 @@ def get(self,format='json',page=''):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']

returnData = MutableString()
returnData = APIUtils.getHackerNewsBestContent(page,format,self.request.url, referer, self.request.remote_addr)
returnData = APIContent.getHackerNewsBestContent(page,format,self.request.url, referer, self.request.remote_addr)
if (not returnData or returnData == None or returnData == '' or returnData == 'None'):
#call the service again this time without the pageID
returnData = APIUtils.getHackerNewsBestContent('',format,self.request.url, referer, self.request.remote_addr)
returnData = APIContent.getHackerNewsBestContent('',format,self.request.url, referer, self.request.remote_addr)

#track this request
GAHelper.trackGARequests('/best', self.request.remote_addr, referer)
Expand Down
7 changes: 3 additions & 4 deletions GetHNCommentsHandler.py
Expand Up @@ -18,7 +18,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
import APIUtils
import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup

Expand All @@ -33,9 +33,8 @@ def get(self,format,id):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']

returnData = MutableString()
returnData = APIUtils.getHackerNewsComments(id,format,self.request.url, referer, self.request.remote_addr)

returnData = APIContent.getHackerNewsComments(id,format,self.request.url, referer, self.request.remote_addr)

#track this request
GAHelper.trackGARequests('/comments/%s' % (id), self.request.remote_addr, referer)

Expand Down
5 changes: 2 additions & 3 deletions GetHNLatestHandler.py
Expand Up @@ -18,7 +18,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
import APIUtils
import APIContent
import GAHelper

class HackerNewsLatestPageHandler(webapp.RequestHandler):
Expand All @@ -32,8 +32,7 @@ def get(self,format='json',limit=1):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']

returnData = MutableString()
returnData = APIUtils.getHackerNewsLatestContent('',format,self.request.url, referer, self.request.remote_addr, limit)
returnData = APIContent.getHackerNewsLatestContent('',format,self.request.url, referer, self.request.remote_addr, limit)

#track this request
GAHelper.trackGARequests('/latest', self.request.remote_addr, referer)
Expand Down
5 changes: 2 additions & 3 deletions GetHNNestedCommentsHandler.py
Expand Up @@ -17,7 +17,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
import APIUtils
import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup

Expand All @@ -32,8 +32,7 @@ def get(self,format,id):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']

returnData = MutableString()
returnData = APIUtils.getHackerNewsNestedComments(id,format,self.request.url, referer, self.request.remote_addr)
returnData = APIContent.getHackerNewsNestedComments(id,format,self.request.url, referer, self.request.remote_addr)

#track this request
GAHelper.trackGARequests('/nestedcomments/%s' % (id), self.request.remote_addr, referer)
Expand Down
7 changes: 3 additions & 4 deletions GetHNNewestHandler.py
Expand Up @@ -16,7 +16,7 @@
import AppConfig
import GAHelper
from xml.sax.saxutils import escape
import APIUtils
import APIContent
import GAHelper
from BeautifulSoup import BeautifulSoup

Expand All @@ -31,11 +31,10 @@ def get(self,format='json',page=''):
if ('HTTP_REFERER' in os.environ):
referer = os.environ['HTTP_REFERER']

returnData = MutableString()
returnData = APIUtils.getHackerNewsNewestContent(page,format,self.request.url, referer, self.request.remote_addr)
returnData = APIContent.getHackerNewsNewestContent(page,format,self.request.url, referer, self.request.remote_addr)
if (not returnData or returnData == None or returnData == '' or returnData == 'None'):
#call the service again this time without the pageID
returnData = APIUtils.getHackerNewsNewestContent('',format,self.request.url, referer, self.request.remote_addr)
returnData = APIContent.getHackerNewsNewestContent('',format,self.request.url, referer, self.request.remote_addr)

#track this request
GAHelper.trackGARequests('/newest', self.request.remote_addr, referer)
Expand Down

0 comments on commit e6cc99d

Please sign in to comment.