Skip to content

Commit

Permalink
Add dynamic sitemap generation route and caching mechanism
Browse files Browse the repository at this point in the history
- Added a route to dynamically generate sitemap.xml on the first request.
- Implemented caching mechanism to serve the generated sitemap.xml
  from the directory on subsequent requests.
- Updated the Flask app to include the generate_sitemap function.
- Ensured the sitemap is generated during the initial deployment
  and reused afterward.

This change improves the SEO functionality by ensuring the sitemap
is always available and up-to-date, with minimal performance overhead.
  • Loading branch information
zahraaalizadeh committed May 23, 2024
1 parent d0b1f72 commit 6c1dc5e
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions gcp/appengine/frontend_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import source_mapper
import utils
from werkzeug import exceptions
from xml.etree.ElementTree import Element, SubElement, ElementTree

blueprint = Blueprint('frontend_handlers', __name__)

Expand All @@ -50,6 +51,9 @@
_VALID_BLOG_NAME = _WORD_CHARACTERS_OR_DASH
_VALID_VULN_ID = _WORD_CHARACTERS_OR_DASH
_BLOG_CONTENTS_DIR = 'blog'
_SITEMAP_FILE_PATH = './sitemap.xml'
#TODO: Determine if this page size is appropriate based on performance considerations
_ECOSYSTEM_VULNERABILITIES_PAGE_SIZE = 100

if utils.is_prod():
redis_host = os.environ.get('REDISHOST', 'localhost')
Expand Down Expand Up @@ -603,3 +607,51 @@ def list_packages(vuln_affected: list[dict]):
def not_found_error(error: exceptions.HTTPException):
logging.info('Handled %s - Path attempted: %s', error, request.path)
return render_template('404.html'), 404


def fetch_vulnerabilities(
ecosystem: str,
page_size: int = _ECOSYSTEM_VULNERABILITIES_PAGE_SIZE) -> list:
vulnerabilities = []
page = 1
while True:
results = osv_query(
search_string=None, page=page, affected_only=False, ecosystem=ecosystem)
items = results['items']
if not items:
break
vulnerabilities.extend(items)
if len(items) < page_size:
break
page += 1
return vulnerabilities


def generate_sitemap() -> None:
urlset = Element(
'urlset', xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")

# Go over the base ecosystems index. Otherwise we'll have duplicated vulnerabilities in the sitemap.
ecosystems = {
ecosystem for ecosystem in osv_get_ecosystems() if ':' not in ecosystem
}

for ecosystem in ecosystems:
ecosystem_element = SubElement(urlset, 'ecosystem', name=ecosystem)
vulnerabilities = fetch_vulnerabilities(ecosystem)

for vulnerability in vulnerabilities:
url = SubElement(ecosystem_element, 'url')
loc = SubElement(url, 'loc')
loc.text = url_for(
'frontend_handlers.vulnerability', vuln_id=vulnerability['id'])

tree = ElementTree(urlset)
tree.write(_SITEMAP_FILE_PATH, encoding='utf-8', xml_declaration=True)


@blueprint.route('/sitemap.xml', methods=['GET'])
def sitemap():
if not os.path.exists(_SITEMAP_FILE_PATH):
generate_sitemap()
return send_from_directory(os.getcwd(), _SITEMAP_FILE_PATH)

0 comments on commit 6c1dc5e

Please sign in to comment.