Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Tkinter based wrapper GUI #108

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
10 changes: 10 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
* 2.0.1 - Tue 11 Mar 2014
- fix mppl behavior

* 2.0.0 - Fri 7 Mar 2014
- replace dependency on mechanize by requests & six
- make compatible with python 3
- properly respect mppl
- also re-download files if the size on disk is larger
- all round bug fixes and small cleanups

* 1.5.1 - Sat 5 Oct
- commandline parameter for selecting weeks
- filename fix following coursera updatex
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ coursera-dl
===========

A python package for archiving content from coursera.org (videos,
lecture notes, quizzes, …) for offline reference. Originally forked from
lecture notes, ...) for offline reference. Originally forked from
[https://github.com/abhirama/coursera-download][] but significantly
cleaned up and enhanced.

Expand Down
2 changes: 1 addition & 1 deletion courseradownloader/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version_info__ = (1,5,1)
__version_info__ = (2,0,1)
__version__ = '.'.join(map(str, __version_info__))
29 changes: 19 additions & 10 deletions courseradownloader/courseradownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import sys
import tarfile
import time
import math
from bs4 import BeautifulSoup
from os import path
from six import print_
Expand Down Expand Up @@ -123,7 +124,7 @@ def lecture_url_from_name(self, course_name):
def trim_path_part(self, s):
mppl = self.max_path_part_len
if mppl and len(s) > mppl:
return s[:mppl - 3] + "..."
return s[:mppl]
else:
return s

Expand Down Expand Up @@ -295,31 +296,37 @@ def download(self, url, target_dir=".", target_fname=None):
headers) or filename_from_url(url)

# split off the extension
_, ext = path.splitext(fname)
basename, ext = path.splitext(fname)

# ensure it respects mppl
fname = self.trim_path_part(basename) + ext

# check if we should skip it (remember to remove the leading .)
if ext and ext[1:] in self.ignorefiles:
print_(' - skipping "%s" (extension ignored)' % fname)
return

filepath = path.join(target_dir, fname)

dl = True
if path.exists(filepath):
if clen > 0:
fs = path.getsize(filepath)
delta = clen - fs
delta = math.fabs(clen - fs)

# all we know is that the current filesize may be shorter than it should be and the content length may be incorrect
# overwrite the file if the reported content length is bigger
# than what we have already by at least k bytes (arbitrary)
# there are cases when a file was not completely downloaded or
# something went wront that meant the file on disk is
# unreadable. The file on disk my be smaller or larger (!) than
# the reported content length in those cases.
# Hence we overwrite the file if the reported content length is
# different than what we have already by at least k bytes (arbitrary)

# TODO this is still not foolproof as the fundamental problem is that the content length cannot be trusted
# so this really needs to be avoided and replaced by something
# else, eg., explicitly storing what downloaded correctly
if delta > 2:
if delta > 10:
print_(
' - "%s" seems incomplete, downloading again' % fname)
' - "%s" seems corrupt, downloading again' % fname)
else:
print_(' - "%s" already exists, skipping' % fname)
dl = False
Expand All @@ -345,7 +352,7 @@ def download(self, url, target_dir=".", target_fname=None):
slice_size = 524288 # 512KB buffer
last_time = time.time()
with open(filepath, 'wb') as f:
for data in response.iter_content(slice_size):
for data in response.iter_content(chunk_size=slice_size):
f.write(data)
try:
percent = int(float(done_size) / full_size * 100)
Expand Down Expand Up @@ -460,6 +467,7 @@ def download_course(self, cname, dest_dir=".", reverse_sections=False, gzip_cour

# ensure the class dir exists
clsdir = path.join(wkdir, clsdirname)

if not path.exists(clsdir):
os.makedirs(clsdir)

Expand All @@ -472,6 +480,7 @@ def download_course(self, cname, dest_dir=".", reverse_sections=False, gzip_cour
classResource, target_dir=clsdir, target_fname=tfname)
except Exception as e:
print_(" - failed: ", classResource, e)

if gzip_courses:
tar_file_name = cname + ".tar.gz"
print_("Compressing and storing as " + tar_file_name)
Expand Down
Loading