In [1]:
import os
import sys
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
    
import utils.downloading
import utils.files



In [2]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )

In [3]:
years_to_download = [2017, 2018]

In [4]:
def download_data(years_to_download):
    logging.info('Starting to download historical data')
    for year in log_progress(years_to_download):
        logging.info('Requesting data for {}'.format(year))
        yearly_data = load_year_data(year)
        logging.info('Saving data for {}'.format(year))
        save_folder_name = get_folder_for_year(year)
        utils.files.ensure_directory(save_folder_name)
        save_year_data(save_folder_name, year, yearly_data)

In [5]:
def get_folder_for_year(year):
    return 'historical_data/{}'.format(year)

In [6]:
def save_year_data(save_folder, year, year_data):
    for index, month_data in enumerate(year_data):
        utils.files.save_text_file(month_data, '{}-{}.csv'.format(year, index + 1), save_folder)

In [7]:
def load_year_data(year):
    downloaded_data = []
    for month in range(1, 13):
        downloaded_data.append(load_month_data(year, month))
    return downloaded_data

In [8]:
def load_month_data(year, month):
    logging.info('Requesting data for {}-{}'.format(year, month))
    downloader = utils.downloading.FlightDataDownloader()
    return downloader.download_fights_history(year, month)

In [None]:
download_data(years_to_download)

INFO:root:Starting to download historical data


VBox(children=(HTML(value=''), IntProgress(value=0, max=2)))

INFO:root:Requesting data for 2017
INFO:root:Requesting data for 2017-1
