dgorissen · ghost · Nov 11, 2013 · Nov 11, 2013 · Dec 4, 2013 · Dec 31, 2013
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,13 @@
+* 2.0.1 - Tue 11 Mar 2014
+	- fix mppl behavior
+
+* 2.0.0 - Fri 7 Mar 2014
+	- replace dependency on mechanize by requests & six
+	- make compatible with python 3
+	- properly respect mppl
+	- also re-download files if the size on disk is larger
+	- all round bug fixes and small cleanups
+
 * 1.5.1 - Sat 5 Oct
 	- commandline parameter for selecting weeks
 	- filename fix following coursera updatex

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@ coursera-dl
 ===========
 
 A python package for archiving content from coursera.org (videos,
-lecture notes, quizzes, …) for offline reference. Originally forked from
+lecture notes, ...) for offline reference. Originally forked from
 [https://github.com/abhirama/coursera-download][] but significantly
 cleaned up and enhanced.
 

diff --git a/courseradownloader/_version.py b/courseradownloader/_version.py
@@ -1,2 +1,2 @@
-__version_info__ = (1,5,1) 
+__version_info__ = (2,0,1) 
 __version__ = '.'.join(map(str, __version_info__))
diff --git a/courseradownloader/courseradownloader.py b/courseradownloader/courseradownloader.py
@@ -11,6 +11,7 @@
 import sys
 import tarfile
 import time
+import math
 from bs4 import BeautifulSoup
 from os import path
 from six import print_
@@ -123,7 +124,7 @@ def lecture_url_from_name(self, course_name):
     def trim_path_part(self, s):
         mppl = self.max_path_part_len
         if mppl and len(s) > mppl:
-            return s[:mppl - 3] + "..."
+            return s[:mppl]
         else:
             return s
 
@@ -295,31 +296,37 @@ def download(self, url, target_dir=".", target_fname=None):
             headers) or filename_from_url(url)
 
         # split off the extension
-        _, ext = path.splitext(fname)
+        basename, ext = path.splitext(fname)
+
+        # ensure it respects mppl
+        fname = self.trim_path_part(basename) + ext
 
         # check if we should skip it (remember to remove the leading .)
         if ext and ext[1:] in self.ignorefiles:
             print_('    - skipping "%s" (extension ignored)' % fname)
             return
-
+        
         filepath = path.join(target_dir, fname)
 
         dl = True
         if path.exists(filepath):
             if clen > 0:
                 fs = path.getsize(filepath)
-                delta = clen - fs
+                delta = math.fabs(clen - fs)
 
-                # all we know is that the current filesize may be shorter than it should be and the content length may be incorrect
-                # overwrite the file if the reported content length is bigger
-                # than what we have already by at least k bytes (arbitrary)
+                # there are cases when a file was not completely downloaded or
+                # something went wront that meant the file on disk is
+                # unreadable. The file on disk my be smaller or larger (!) than
+                # the reported content length in those cases.
+                # Hence we overwrite the file if the reported content length is
+                # different than what we have already by at least k bytes (arbitrary)
 
                 # TODO this is still not foolproof as the fundamental problem is that the content length cannot be trusted
                 # so this really needs to be avoided and replaced by something
                 # else, eg., explicitly storing what downloaded correctly
-                if delta > 2:
+                if delta > 10:
                     print_(
-                        '    - "%s" seems incomplete, downloading again' % fname)
+                        '    - "%s" seems corrupt, downloading again' % fname)
                 else:
                     print_('    - "%s" already exists, skipping' % fname)
                     dl = False
@@ -345,7 +352,7 @@ def download(self, url, target_dir=".", target_fname=None):
                 slice_size = 524288  # 512KB buffer
                 last_time = time.time()
                 with open(filepath, 'wb') as f:
-                    for data in response.iter_content(slice_size):
+                    for data in response.iter_content(chunk_size=slice_size):
                         f.write(data)
                         try:
                             percent = int(float(done_size) / full_size * 100)
@@ -460,6 +467,7 @@ def download_course(self, cname, dest_dir=".", reverse_sections=False, gzip_cour
 
                 # ensure the class dir exists
                 clsdir = path.join(wkdir, clsdirname)
+
                 if not path.exists(clsdir):
                     os.makedirs(clsdir)
 
@@ -472,6 +480,7 @@ def download_course(self, cname, dest_dir=".", reverse_sections=False, gzip_cour
                             classResource, target_dir=clsdir, target_fname=tfname)
                     except Exception as e:
                         print_("    - failed: ", classResource, e)
+
         if gzip_courses:
             tar_file_name = cname + ".tar.gz"
             print_("Compressing and storing as " + tar_file_name)