## Colab Setup

In [None]:
COLAB = True
KAGGLE = False
DOWNLOAD_DATA = True
SAVE_TO_GITHUB = False
GIT_REPOSITORY = "osic-pulmonary-fibrosis-progression"
FILE_NAME = "main.ipynb"

if COLAB:
    PARENT_DIRECTORY_PATH = "/content"
    # In case you want to clone in your drive:
    # PARENT_DIRECTORY_PATH = "/content/gdrive/My Drive"
    PROJECT_PATH = PARENT_DIRECTORY_PATH + "/" + GIT_REPOSITORY
    %cd "{PARENT_DIRECTORY_PATH}"

### Linking personal Google Drive storage with Google Colab

Mounting is the process by which the os makes files and directories of a
storage service (google drive) available for the users via the computer's
file system. Log in will be required.

In [None]:
if COLAB:
    %cd /content
    from google.colab import drive
    drive.mount('/content/gdrive')

### Clone GitHub repository to Colab Runtime system

In [None]:
if COLAB:
    import json

    with open("/content/gdrive/My Drive/Git/git.json", "r") as f:
        parsed_json = json.load(f)

    GIT_USER_NAME = parsed_json["GIT_USER_NAME"]
    GIT_TOKEN = parsed_json["GIT_TOKEN"]
    GIT_USER_EMAIL = parsed_json["GIT_USER_EMAIL"]

    GIT_PATH = (
        f"https://{GIT_TOKEN}@github.com/{GIT_USER_NAME}/{GIT_REPOSITORY}.git"
    )

    %cd "{PARENT_DIRECTORY_PATH}"

    !git clone "{GIT_PATH}"  # Clone the github repository

    %cd "{PROJECT_PATH}"

### Kaggle API Setup

Run the following code to provide the config path to kaggle.json
(api credentials)

In [None]:
if COLAB:
    import os

    os.environ["KAGGLE_CONFIG_DIR"] = "/content/gdrive/My Drive/Kaggle"

### Download the data using the API

Before start downloading the data, make sure u are in a directory outside
your Google Drive; otherwise, u will put the data there and you will reach
the limit storage easily.

In [None]:
if DOWNLOAD_DATA:
    !mkdir input
    %cd input

    !pip install --upgrade kaggle
    # Go to kaggle and copy the API Command to download the dataset
    # !kaggle competitions download -c {GIT_REPOSITORY}
    # Instead of downloading all data, we select specific files.
    !kaggle competitions download {GIT_REPOSITORY} -f train.csv
    !kaggle competitions download {GIT_REPOSITORY} -f test.csv
    !kaggle competitions download {GIT_REPOSITORY} -f sample_submission.csv

    # Unzipping the zip files and deleting the zip files
    !unzip \*.zip  && rm *.zip

    # After downloading all data, go back to PROJECT_PATH
    %cd ..

if KAGGLE:
    INPUT_ROOT = f"../input/{GIT_REPOSITORY}"
else:
    INPUT_ROOT = f"../{GIT_REPOSITORY}/input"

# INPUT_ROOT will be used as follows
# train_df = pd.read_csv(f"{INPUT_ROOT}/train.csv")

### Save changes to GitHub

In [None]:
if SAVE_TO_GITHUB:
    !git add {FILE_NAME}
    !git config --global user.email {GIT_USER_EMAIL}
    !git config --global user.name {GIT_USER_NAME}
    !git commit -am "update {FILE_NAME}"
    !git push