## Download script for OMI data

Example of how to download monthly summaries of asc.gz files from temis.nl server

In [None]:
import os
import wget
import pandas as pd
import requests
from tqdm import *

In [None]:
def make_url(year, month):
    """
    Url needs to have a format of:
    http://temis.nl/airpollution/no2col/data/omi/2006/06/no2_200606.asc.gz
    """
    base = "http://temis.nl/airpollution/no2col/data/omi/data_v2/"
    file_start = "no2_"
    file_end =".asc.gz"
    return ''.join([base, year,'/', month, '/',
                    file_start, year, month, file_end])

def make_target(year, month):
    """Make a target folder/filename string"""
    return ''.join(["Data/", year, month,".asc.gz"])

def download_file(year, month):
    """Main downloader"""
    if not os.path.isdir("Data"):
        os.makedirs("Data")
    url = make_url(year, month)
    #print(url)
    r = requests.get(url)
    r.status_code
    if r.status_code is 200:
        wget.download(url, out=make_target(year, month))
    else:
        raise IOError("File not found")

In [None]:
# Example of downloading a single year/month file

download_file('2010', '06')

If there is no Data/ folder in the local folder, one will be made
if the [year][month].asc.gz file doesnt exist on the server 
no download will be attempted, and instead an Eror will be raised.
(This is so you can use a try: except: syntax to run a loop.)

### Download multiple files

To download multiple files you will need to iterate over a range of date-time objects.
Each element of the list can then be used to get a month / year pair, used as inputs to the downloader.

In [None]:
def download_batch(start, end):
    """
    Provide a start and and end date.
    A local Data folder will be created if none exists.
    All files present in temis.nl/airpollution/no2col/data/omi/data_v2/
    will be downloaded there. 
    (Even though dates are given to days, the time steps are monthly.)
    
    start = date string 'YYYYMMDD'
    end =  date string 'YYYYMMDD'
    
    e.g. 
    >>> download_batch(start='20041001', end='20161001')
    """
    dates = pd.date_range(start=start, end=end,freq='M')
    missing = []
    for date in tqdm(dates):
        month = "{0:02d}".format(date.month)
        year = str(date.year)
        try:
            download_file(year, month)
        except:
            missing.append((year,month))
    return missing

In [None]:
missing

In [None]:
os.remove(Data/*.tmp)

In [None]:
ls Data/*.tmp

In [None]:
#TODO

# Make a nice report of missing data

# Clean any duplicate or temporary files from Data folder