# Work with Kaggle repo in Colab

In [8]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%pip install GitPython

from git import Repo, Git



## Set up the repo

In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks

from pathlib import Path
from getpass import getpass

if not Path("kaggle").exists():
    github_PAT = getpass("Enter GitHub PAT:")
    git = Git(".").clone(f"https://{github_PAT}@github.com/Witalia008/kaggle.git")

repo = Repo("kaggle")
git = repo.git

/content/drive/MyDrive/Colab Notebooks


In [None]:
# Fix different filemode
with repo.config_writer() as config:
    config.set_value("core", "filemode", "false")
print(f"Set core.filemode to {repo.config_reader().get_value('core', 'filemode')}")

Set core.filemode to False


## Checkout a branch

In [None]:
import ipywidgets as widgets

repo.remotes.origin.fetch(prune=True)
remote_branches = [r.name for r in repo.remotes.origin.refs]

branch_select = widgets.Dropdown(
    options=remote_branches[1:],  # skip "origin/HEAD" option
    description="Select a branch to checkout"
)
display(branch_select)

Dropdown(description='Select a branch to checkout', options=('origin/master', 'origin/vinbigdata-chest-xray-ab…

In [None]:
branch_name = branch_select.value
branch_name_local = branch_name[branch_name.find("/") + 1:]

existing_local_branches = [b.name for b in repo.branches]
if branch_name_local in existing_local_branches:
    git.checkout(branch_name_local)
else:
    git.checkout(branch_name, b=branch_name_local)

git.pull()
print(git.branch())

  master
* vinbigdata-chest-xray-abnormalities-detection


## Setup kaggle directories

In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks/kaggle

/content/drive/MyDrive/Colab Notebooks/kaggle


In [9]:
%%writefile setup_colab.py
import json
from pathlib import Path
import shutil

INPUT_FOLDER = Path("/kaggle/input/")
OUTPUT_FOLDER = Path("/kaggle/output/")
WORK_FOLDER = Path("/kaggle/working/")


def dump_dataset_metadata(user_name, dataset_name, folder_path):
    with open(Path(folder_path) / "dataset-metadata.json", "w") as f:
        json.dump({
            "title": dataset_name,
            "id": f"{user_name}/{dataset_name}",
            "licenses": [{ "name": "CC0-1.0" }]
        }, f, indent=4)


def is_running_in_colab(check_env=True):
    if not check_env:
        return True
    running_in_colab = "google.colab" in str(get_ipython())
    print(f"Running in Colab: {running_in_colab}")
    return running_in_colab


def setup_colab_drive_for_kaggle(check_env=True):
    if not is_running_in_colab(check_env):
        return False

    from google.colab import drive
    drive.mount("/content/drive")

    return True  # Is Colab


def setup_colab_secrets_for_kaggle(check_env=True):
    if not is_running_in_colab(check_env):
        return False

    drive_sources_dir = Path("/content/drive/MyDrive/Colab Notebooks/kaggle")

    # Set up kaggle.json to access Kaggle data.
    if (drive_sources_dir / "kaggle.json").exists():
        kaggle_config = Path.home() / ".kaggle"
        if kaggle_config.exists():
            shutil.rmtree(kaggle_config)
        kaggle_config.mkdir()
        (kaggle_config / "kaggle.json").symlink_to(drive_sources_dir / "kaggle.json")
        print(f"Content of Kaggle config dir ({kaggle_config}): {list(map(str, kaggle_config.iterdir()))}")

    return True  # Is Colab


def setup_colab_directories_for_kaggle(check_env=True, local_working=False):
    if not is_running_in_colab(check_env):
        return False

    # Only add "working" directory if it was requested to be mapped in Drive, not in local env.
    target_content_dirs = ["input", "output"] + ([] if local_working else ["working"])

    drive_content_dir = Path("/content/drive/MyDrive/kaggle")
    # Make sure directories are present in Drive
    drive_content_dir.mkdir(exist_ok=True)
    for content_dir in target_content_dirs:
        (drive_content_dir / content_dir).mkdir(exist_ok=True)
    print(f"Content of Drive Kaggle data dir ({drive_content_dir}): {list(map(str, drive_content_dir.iterdir()))}")

    kaggle_dir = Path("/kaggle")
    if kaggle_dir.exists():
        shutil.rmtree(kaggle_dir)
    kaggle_dir.mkdir()

    for content_dir in target_content_dirs:
        (kaggle_dir / content_dir).symlink_to(drive_content_dir / content_dir)

    # It was requested not to map working to Drive, so create it locally.
    if local_working:
        (kaggle_dir / "working").mkdir()

    print(f"Content of Kaggle data dir ({kaggle_dir}): {list(map(str, kaggle_dir.iterdir()))}")
    for content_dir in target_content_dirs + (["working"] if local_working else []):
        print(f"Content of Kaggle data subdir ({kaggle_dir / content_dir}): {list(map(str, (kaggle_dir / content_dir).iterdir()))}")

    return True  # Is Colab


def setup_colab_for_kaggle(check_env=True, local_working=False):
    if not is_running_in_colab(check_env):
        return False

    setup_colab_drive_for_kaggle(check_env=False)
    setup_colab_directories_for_kaggle(check_env=False, local_working=local_working)
    setup_colab_secrets_for_kaggle(check_env=False)

    return True  # Is Colab


Overwriting setup_colab.py


In [None]:
import setup_colab
from importlib import reload
reload(setup_colab)
from setup_colab import setup_colab_for_kaggle, INPUT_FOLDER, WORK_FOLDER, dump_dataset_metadata
setup_colab_for_kaggle(local_working=True)

Running in Colab: True
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Content of Drive Kaggle data dir (/content/drive/MyDrive/kaggle): ['/content/drive/MyDrive/kaggle/input', '/content/drive/MyDrive/kaggle/working', '/content/drive/MyDrive/kaggle/.ipynb_checkpoints', '/content/drive/MyDrive/kaggle/output']
Content of Kaggle data dir (/kaggle): ['/kaggle/output', '/kaggle/input', '/kaggle/working']
Content of Kaggle data subdir (/kaggle/input): ['/kaggle/input/cassava-model', '/kaggle/input/cassava-leaf-disease-classification', '/kaggle/input/googlebitemperedloss', '/kaggle/input/vbdyolo', '/kaggle/input/.ipynb_checkpoints', '/kaggle/input/vinbigdata', '/kaggle/input/vinbigdata-chest-xray-abnormalities-detection', '/kaggle/input/vinbigdata-chest-xray-original-png']
Content of Kaggle data subdir (/kaggle/output): ['/kaggle/output/vbdyolo_out_1_300epochs', '/kaggle/output/.ipynb_checkpoints', '/kaggle/outpu

True