In [None]:
import json
import os
from pathlib import Path
import zipfile

# from logger_api import get_logger
import logging

logger = logging.getLogger(__name__)
# logger = get_logger(__name__)

In [None]:
DATA_PATH = Path("data")
KAGGLE_JSON = Path.home() / ".kaggle" / "kaggle.json"
IS_KAGGLE_KEY = KAGGLE_JSON.exists()
KAGGLE_API = None

DATASETS = {
    "1C Sales Dataset": {
        "source": "kaggle",
        "name": "competitive-data-science-predict-future-sales",
        "path": "1c_sales_dataset",
        "filename": "competitive-data-science-predict-future-sales.zip",
    },
    "Montreal Bixi Bike Data": {
        "source": "kaggle",
        "name": "supercooler8/bixi-bike-montreal",
        "path": "bixi_bike_data",
        "filename": "bixi-bike-montreal.zip",
    },
    "Turkish Retail Sales": {
        "source": "Kaggle",
        "name": "berkayalan/retail-sales-data",
        "path": "turkish_retail_sales",
        "filename": "retail-sales-data.zip",
    },
    "Sunspot": {
        "source": "Monash Forecasting Repository",
        "url": "https://zenodo.org/record/4654773/files/sunspot_dataset_with_missing_values.zip?download=1",
        "path": "sunspot",
        "filename": "sunspot.zip",
    },
    "Electricity Demand": {
        "source": "Monash Forecasting Repository",
        "url": "https://zenodo.org/record/4656069/files/elecdemand_dataset.zip?download=1",
        "path": "electricity_demand",
        "filename": "electricity_demand.zip",
    },
    "Dominick Sales": {
        "source": "Monash Forecasting Repository",
        "url": "https://zenodo.org/record/4654802/files/dominick_dataset.zip?download=1",
        "path": "dominick_sales",
        "filename": "dominick_sales.zip",
    },
    # "London Smart Meters": {
    #     "source": "Monash Forecasting Repository",
    #     "url": "https://zenodo.org/record/4656072/files/london_smart_meters_dataset_with_missing_values.zip?download=1",
    #     "path": "london_smart_meters",
    #     "filename": "london_smart_meters.zip",
    # },
    "London Smart Meters": {
        "source": "Kaggle",
        "name": "jeanmidev/smart-meters-in-london",
        "path": "london_smart_meters",
        "filename": "smart-meters-in-london.zip",
    },
    "Tourism": {
        "source": "Monash Forecasting Repository",
        "url": "https://zenodo.org/record/4656096/files/tourism_monthly_dataset.zip?download=1",
        "path": "tourism",
        "filename": "tourism.zip",
    },
}


def get_kaggle_username_key(username=None, key=None):
    _authenticate_api = False
    if ("KAGGLE_USERNAME" in os.environ) and ("KAGGLE_KEY" in os.environ):
        logger.info("Kaggle Username and Key already set as environment variables")
        _authenticate_api = True
    elif (username is not None) and (key is not None):
        logger.info("Kaggle Username and Key retrieved from parameters")
        _authenticate_api = True
    elif IS_KAGGLE_KEY:
        with open(KAGGLE_JSON, "r") as f:
            kaggle_dict = json.load(f)
            username = kaggle_dict["username"]
            key = kaggle_dict["key"]
        logger.info("Kaggle Username and Key retrieved from kaggle.json.")
        _authenticate_api = True
    else:
        logger.warning(
            "kaggle.json not found in api_keys folder, username and key is not passed as parameter or is not set as required environment variables"
        )
    return username, key, _authenticate_api


def get_authenticated_kaggle_api(username=None, key=None):
    global KAGGLE_API
    username, key, _authenticate_api = get_kaggle_username_key(username, key)
    if _authenticate_api and KAGGLE_API is None:
        os.environ["KAGGLE_USERNAME"] = username
        os.environ["KAGGLE_KEY"] = key
        from kaggle.api.kaggle_api_extended import KaggleApi

        KAGGLE_API = KaggleApi()
        KAGGLE_API.authenticate()
    return KAGGLE_API


def _download_competition_dataset(api, dataset_details):
    api.competition_download_files(
        dataset_details["name"],
        path=DATA_PATH / dataset_details["path"],
        quiet=False,
    )


def _download_dataset(api, dataset_details):
    api.dataset_download_files(
        dataset_details["name"],
        path=DATA_PATH / dataset_details["path"],
        quiet=False,
        unzip=True,
    )


def _unzip(path, filename, delete_zip=True):
    with zipfile.ZipFile(
        str(DATA_PATH / path / filename),
        "r",
    ) as zip_ref:
        zip_ref.extractall(DATA_PATH / path)
    if delete_zip:
        (DATA_PATH / path / filename).unlink()


def download_kaggle_dataset(
    dataset_details, username=None, key=None, competition=False
):
    api = get_authenticated_kaggle_api(username, key)
    if api is not None:
        if competition:
            _download_competition_dataset(api, dataset_details)
            logger.info("Donwload completed. Unzipping..")
            _unzip(
                dataset_details["path"], dataset_details["filename"], delete_zip=True
            )
        else:
            _download_dataset(api, dataset_details)
    else:
        raise ValueError(
            "Kaggle API wasn't able to authenticate. Please provide username and key. Refer to README for instructions on how to do that."
        )


def download_london_smart_meters(username=None, key=None):
    logger.info("Downloading London Smart Meters Dataset...")
    dataset_details = DATASETS["London Smart Meters"]
    download_kaggle_dataset(dataset_details, username, key, competition=False)

In [None]:
download_london_smart_meters()