Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating from main repo #2

Merged
merged 14 commits into from
Jul 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
language: python
python:
- "2.7"
- "3.3"
- "3.4"
- "3.5"
- "3.6"
- "3.7"
- "pypy"
matrix:
allow_failures:
- python: "3.3"
- python: "3.7"
- python: "pypy"
# command to install dependencies
install:
Expand All @@ -26,3 +26,4 @@ notifications:
- iemejia@gmail.com
- kidsshk3@gmail.com
- rbrito@ime.usp.br
- baltazar.bz@gmail.com
2 changes: 1 addition & 1 deletion edx_dl/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.9'
__version__ = '0.1.10'
9 changes: 8 additions & 1 deletion edx_dl/edx_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ def download_url(url, filename, headers, args):
download_youtube_url(url, filename, headers, args)
else:
import ssl
import requests
# FIXME: Ugly hack for coping with broken SSL sites:
# https://www.cs.duke.edu/~angl/papers/imc10-cloudcmp.pdf
#
Expand All @@ -715,7 +716,13 @@ def download_url(url, filename, headers, args):
# order) is due to different behaviors in different Python versions
# (e.g., 2.7 vs. 3.4).
try:
urlretrieve(url, filename)
# mitxpro fix for downloading compressed files
if 'zip' in url and 'mitxpro' in url:
urlretrieve(url, filename)
else:
r = requests.get(url, headers=headers)
with open(filename, 'wb') as fp:
fp.write(r.content)
except Exception as e:
logging.warn('Got SSL/Connection error: %s', e)
if not args.ignore_errors:
Expand Down
11 changes: 7 additions & 4 deletions edx_dl/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from .common import Course, Section, SubSection, Unit, Video


# Force use of bs4 with html5lib
BeautifulSoup = lambda page: BeautifulSoup_(page, 'html5lib')
# Force use of bs4 with html.parser
BeautifulSoup = lambda page: BeautifulSoup_(page, 'html.parser')


def edx_json2srt(o):
Expand Down Expand Up @@ -388,7 +388,7 @@ def _make_subsections(section_soup):
# FIXME correct extraction of subsection.name (unicode)
subsections = [SubSection(position=i,
url=s.a['href'],
name=s.a.div.span.string.strip())
name=s.a.div.div.string.strip())
for i, s in enumerate(subsections_soup, 1)]

return subsections
Expand All @@ -412,7 +412,10 @@ def get_page_extractor(url):
"""
factory method for page extractors
"""
if url.startswith('https://courses.edx.org'):
if (
url.startswith('https://courses.edx.org') or
url.startswith('https://mitxpro.mit.edu')
):
return NewEdXPageExtractor()
elif (
url.startswith('https://edge.edx.org') or
Expand Down
9 changes: 5 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
beautifulsoup4>=4.1.3
html5lib>=1.0b2
six>=1.5.0
youtube_dl>=2015.05.20
beautifulsoup4>=4.6.0
html5lib>=1.0.1
six>=1.11.0
youtube_dl>=2018.06.18
requests>=2.18.4
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,12 @@ def read_file(filename, alt=None):
'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Programming Language :: Python',
Expand All @@ -87,8 +87,8 @@ def read_file(filename, alt=None):
setup(
name='edx-dl',
version=__version__,
maintainer='Ismaël Mejía, Rogério Theodoro de Brito',
maintainer_email='iemejia@gmail.com, rbrito@ime.usp.br',
maintainer='Ismaël Mejía, Rogério Theodoro de Brito, Yuri Bochkarev',
maintainer_email='iemejia@gmail.com, rbrito@ime.usp.br, baltazar.bz@gmail.com',

license='LGPL',
url='https://github.com/coursera-dl/edx-dl',
Expand Down