In [62]:
import sys
import os
import urllib.request
import tarfile
import zipfile

In [63]:
def _print_download_progress(count, block_size, total_size):
    """
    Function used for printing the download progress.
    Used as a call-back function in maybe_download_and_extract().
    """

    # Percentage completion.
    pct_complete = float(count * block_size) / total_size

    # Status-message. Note the \r which means the line should overwrite itself.
    msg = "\r- Download progress: {0:.1%}".format(pct_complete)

    # Print it.
    sys.stdout.write(msg)
    sys.stdout.flush()

In [64]:
def maybe_download_and_extract(url, download_dir):
    """
    Download and extract the data if it doesn't already exist.
    Assumes the url is a tar-ball file.
    :param url:
        Internet URL for the tar-file to download.
        Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
    :param download_dir:
        Directory where the downloaded file is saved.
        Example: "data/CIFAR-10/"
    :return:
        Nothing.
    """

    # Filename for saving the file downloaded from the internet.
    # Use the filename from the URL and add it to the download_dir.
    filename = url.split('/')[-1]
    file_path = os.path.join(download_dir, filename)

    # Check if the file already exists.
    # If it exists then we assume it has also been extracted,
    # otherwise we need to download and extract it now.
    if not os.path.exists(file_path):
        # Check if the download directory exists, otherwise create it.
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)

        # Download the file from the internet.
        file_path, _ = urllib.request.urlretrieve(url=url,
                                                  filename=file_path,
                                                  reporthook=_print_download_progress)

        print()
        print("Download finished. Extracting files.")

        if file_path.endswith(".zip"):
            # Unpack the zip-file.
            zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir)
        elif file_path.endswith((".tar.gz", ".tgz")):
            # Unpack the tar-ball.
            tarfile.open(name=file_path, mode="r:gz").extractall(download_dir)

        print("Done.")
    else:
        print("Data has apparently already been downloaded and unpacked.")


In [65]:
url = 'http://www.repository.voxforge1.org/downloads/pt/Trunk/Audio/Main/16kHz_16bit/Antonio-20090724-uip.tgz'

In [76]:
url = 'http://www.repository.voxforge1.org/downloads/pt/Trunk/Audio/Main/16kHz_16bit/ErivanCerqueira-20100222-mwm.tgz'

In [77]:
download_dir = 'C:/Users/eborges/teste/temp/'

In [78]:
maybe_download_and_extract(url,download_dir)

- Download progress: 100.2%
Download finished. Extracting files.
Done.


In [82]:
#!/usr/bin/python

import os, sys

# Open a file
path = "C:\\Users\\eborges\\teste\\temp"
#path ="http://www.repository.voxforge1.org/downloads/pt/Trunk/Audio/Main/16kHz_16bit/"
dirs = os.listdir( path )

# This would print all the files and directories
for file in dirs:
   print(file)

ErivanCerqueira-20100222-mwm
ErivanCerqueira-20100222-mwm.tgz
etc
LICENSE
teste.txt
teste2.txt
wav
