# Getting Started

Let's start by downloading the needed train, validation and test images.

In [None]:
import os
import zipfile
import requests
import shutil


def download_file_from_url(url, local_filename):
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        shutil.copyfileobj(r.raw, f)


def download_dataset(url, directory='data/'):
    dataset_name, file_ext = os.path.splitext(os.path.basename(url))
    file_name = "{}{}{}".format(directory, dataset_name, file_ext)
    if not os.path.exists(file_name):
        print("Downloading {} dataset...".format(dataset_name), end='', flush=True)
        download_file_from_url(url, file_name)
        
        if file_ext == '.zip':
            with zipfile.ZipFile(file_name, "r") as zip_ref:
                zip_ref.extractall(directory)
        print("DONE")
    else:
        print("{} dataset already downloaded".format(dataset_name))


# Create data directories if don't exist
for dataset in ("train", "valid", "test"):
    directory = "data/{}".format(dataset)
    os.makedirs(directory, exist_ok=True)
    download_dataset(
        "https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/{}.zip".format(dataset),
        directory
    )