In [1]:
import os
import urllib
import zipfile
import numpy as np
import pandas as pd

DATA_PATH = os.path.join('..','data')
DOWNLOAD_URL = 'https://github.com/SpikeLab-CL/challenge_watershed/raw/main/flux.csv.zip'
FILENAME = 'flux.csv'

def fetch_data(data_path : str = DATA_PATH, download_url : str = DOWNLOAD_URL) -> None:
    """
    Download the requested file to the specified folder.
    
    Parameters
    ----------
    data_path : string
        Folder path.
    download_url : string
        File URL.
        
    Returns
    -------
    None
    """
    zip_path = os.path.join(data_path, 'flux.csv.zip')
    urllib.request.urlretrieve(download_url, zip_path)
    with zipfile.ZipFile(zip_path) as zfile:
        zfile.extractall(data_path)    
        
def load_data(data_path : str = DATA_PATH, filename : str = FILENAME) -> pd.DataFrame:
    """
    Obtain DataFrame from file with parsed dates and sorted by them.
    
    Parameters
    ----------
    data_path : string
        Folder path.
    filename : string
        Filename.
        
    Returns
    -------
    pd.DataFrame
    """

    file_path = os.path.join(data_path, filename)
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'],
                                format='%Y-%m-%d',
                                errors='coerce')
    df = df.sort_values(by=['date']).reset_index(drop=True)
    return df

In [2]:
fetch_data()

In [None]:
df = load_data()