Skip to content

Commit

Permalink
Handle period in ecosystem name
Browse files Browse the repository at this point in the history
  • Loading branch information
zahraaalizadeh committed Jun 4, 2024
1 parent 648c27f commit 0329ec4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
33 changes: 29 additions & 4 deletions gcp/appengine/frontend_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,28 @@ def _sanitize_input(text: str):
Sanitize the user-provided filename to prevent directory traversal attacks.
"""
return "".join(
c if c.isalnum() or c in ('-', '_') else '_' for c in text).strip()
c if c.isalnum() or c in ('-', '_', '.') else '_' for c in text).strip()


def _ecosystem_to_sitemap_filename(text: str, reverse: bool = False) -> str:
"""Convert an ecosystem name to a sitemap filename or vice versa.
This function replaces spaces with underscores and periods with double
underscores to create a valid filename for a sitemap. If the `reverse`
flag is set to True, it converts the filename back to the original
ecosystem name by replacing underscores with spaces and double underscores
with periods.
Args:
text (str): The ecosystem name or sitemap filename to convert.
reverse (bool): If True, converts a sitemap filename back to an ecosystem name.
If False, converts an ecosystem name to a sitemap filename.
Returns:
str: The converted ecosystem name or sitemap filename."""
if reverse:
return text.replace('__', '.').replace('_', ' ')
return text.replace('.', '__').replace(' ', '_')


@blueprint.route('/sitemap/index.xml', methods=['GET'])
Expand All @@ -699,17 +720,21 @@ def sitemap(ecosystem):
missing"""
sanitized_ecosystem = _sanitize_input(ecosystem)
try:
# Verify the ecosystem is valid.
# Verify the ecosystem is valid. Since the ecosystem name may include
# spaces or dots, and these characters are not preferable in sitemap
# filenames need to convert these special characters to safer options
# (e.g space to '_' and dot to '__'.
# NOTE: We need to pass the ecosystem from the list of valid ecosystems
# to prevent the security linter from complaining.
ecosystems = osv_get_ecosystems()
idx = ecosystems.index(sanitized_ecosystem.replace('_', ' '))
idx = ecosystems.index(
_ecosystem_to_sitemap_filename(sanitized_ecosystem, reverse=True))
cleaned_ecosystem = ecosystems[idx]
except ValueError:
abort(404)
return None

filename = f'{cleaned_ecosystem.replace(" ","_")}.xml'
filename = f'{_ecosystem_to_sitemap_filename(cleaned_ecosystem)}.xml'
sitemaps_directory = get_sitemaps_directory()
sitemap_path = f"{sitemaps_directory}/{filename}"
if not os.path.exists(sitemap_path):
Expand Down
4 changes: 2 additions & 2 deletions gcp/appengine/generate_sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ def osv_get_ecosystems():


def get_sitemap_filename_for_ecosystem(ecosystem: str) -> str:
ecosystem_name = ecosystem.replace(' ', '_').strip()
ecosystem_name = ecosystem.replace(' ', '_').replace('.', '__').strip()
return f'{_SITEMAPS_DIRECTORY}/{ecosystem_name}.xml'


def get_sitemap_url_for_ecosystem(ecosystem: str, base_url: str) -> str:
ecosystem_name = ecosystem.replace(' ', '_').strip()
ecosystem_name = ecosystem.replace(' ', '_').replace('.', '__').strip()
return f'{base_url}/sitemap/{ecosystem_name}.xml'


Expand Down

0 comments on commit 0329ec4

Please sign in to comment.