# core

>  Some utility functions for working with the Kaggle API. 

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from pathlib import Path  # For working with file paths
import json

In [None]:
#| export
def save_kaggle_creds(username:str, # The Kaggle API username.
                      key:str, # The Kaggle API key.
                      overwrite=False): # Overwrite existing credentials.
    """
    Save the Kaggle API credentials.
    """
    
    if username == "":
        print("Empty username.")
        return
    if key == "":
        print("Empty key.")
        return
    
    # Set the path to the kaggle.json file
    cred_path = Path('~/.kaggle/kaggle.json').expanduser()

    # Check if the file already exists
    if not cred_path.exists() or overwrite:
        # Create the directory if it does not exist
        cred_path.parent.mkdir(exist_ok=True)
        # Save the API key to the file
        with open(cred_path, "w") as write_file: 
            json.dump({"username": username, "key": key}, write_file)
        # Set the file permissions to be readable and writable by the current user
        cred_path.chmod(0o600)
    else:
        print("Credentials already present. Set `overwrite=True` to replace them.")

In [None]:
username = "name"
key = "12345"
save_kaggle_creds(username, key, overwrite=False)

Credentials already present. Set `overwrite=True` to replace them.


In [None]:
#| export
from cjm_psl_utils.core import file_extract

In [None]:
#| export
def dl_kaggle(kaggle_dataset, # The Kaggle dataset id in the format `'<username>/<dataset_name>'`.
              archive_path, # The path to save the archive file.
              dataset_path, # The path to save the extracted dataset.
              delete_archive=True): # Delete the archive after extraction.
    """
    Download the Kaggle dataset and extract it to the specified directory. 
    """
    
    # Import the API module from the kaggle package
    from kaggle import api
    
    # If the dataset does not exist in the specified directory, download and extract it.
    if not dataset_path.exists():
        api.dataset_download_cli(kaggle_dataset, path=archive_path.parent)

        file_extract(fname=archive_path, dest=dataset_path.parent)

        # Delete the archive if specified
        if delete_archive: archive_path.unlink()
    else:
        # Inform the user that the dataset already exists in the specified directory.
        print("Dataset already downloaded")

In [None]:
# Get the path to the directory where datasets are stored
dataset_dir = Path("./Datasets/")
dataset_dir.mkdir(parents=True, exist_ok=True)
print(f"Dataset Directory: {dataset_dir}")

# Create the path to the data directory
archive_dir = dataset_dir/'../Archive'
archive_dir.mkdir(parents=True, exist_ok=True)
print(f"Archive Directory: {archive_dir}")

Dataset Directory: Datasets
Archive Directory: Datasets/../Archive


In [None]:
# Set the name of the dataset
dataset_name = 'style-image-samples'

# Construct the Kaggle dataset name by combining the username and dataset name
kaggle_dataset = f'innominate817/{dataset_name}'

In [None]:
# Create the path to the zip file that contains the dataset
archive_path = Path(f'{archive_dir}/{dataset_name}.zip')
print(f"Archive Path: {archive_path}")

# Create the path to the directory where the dataset will be extracted
dataset_path = Path(f'{dataset_dir}/{dataset_name}')
print(f"Dataset Path: {dataset_path}")

Archive Path: Datasets/../Archive/style-image-samples.zip
Dataset Path: Datasets/style-image-samples


In [None]:
dl_kaggle(kaggle_dataset, archive_path, dataset_path)

Downloading style-image-samples.zip to Datasets/../Archive


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 16.2M/16.2M [00:00<00:00, 50.8MB/s]





In [None]:
!ls {dataset_path}

images


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()