Skip to content

Commit

Permalink
Fix HTMLParser.unescape error in Python 3.9 and above
Browse files Browse the repository at this point in the history
  • Loading branch information
csyezheng committed Oct 4, 2023
1 parent fe867af commit 05da242
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions edx_dl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

# This module contains generic functions, ideally useful to any other module
from six.moves.urllib.request import urlopen, Request
from six.moves import html_parser

import sys
import errno
import json
import logging
Expand All @@ -16,6 +16,13 @@
from tqdm.auto import tqdm


if sys.version_info[:2] >= (3, 4):
import html
else:
from six.moves import html_parser
html = html_parser.HTMLParser()


def get_filename_from_prefix(target_dir, filename_prefix):
"""
Return the basename for the corresponding filename_prefix.
Expand Down Expand Up @@ -122,8 +129,7 @@ def clean_filename(s, minimal_change=False):
"""

# First, deal with URL encoded strings
h = html_parser.HTMLParser()
s = h.unescape(s)
s = html.unescape(s)

# strip paren portions which contain trailing time length (...)
s = (
Expand Down

0 comments on commit 05da242

Please sign in to comment.