# Cassava Leaf Disease Data Download

Interactive Notebook to download the data from Kaggle and store it on Google Drive, for the purposes of the assignment.

In [1]:
!pip install -q kaggle

In [15]:
from google.colab import drive, files, userdata
from pathlib import Path
from PIL import Image

import json
import matplotlib.pyplot as plt
import zipfile

In [3]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [22]:
def mkdir(path_str: str) -> Path:
  path = Path(path_str)
  path.mkdir(parents=True, exist_ok=True)
  return path

In [4]:
def load_kaggle_creds() -> None:
  """
  Creates the kaggle.json credentials file, which the API expects.
  """
  kaggle_username = userdata.get('kaggle_username')
  kaggle_key = userdata.get('kaggle_key')
  creds_dict = {"username": kaggle_username, "key": kaggle_key}

  with open('kaggle.json', 'w') as file:
     file.write(json.dumps(creds_dict))

  !mkdir -p ~/.kaggle
  !cp kaggle.json ~/.kaggle/
  !chmod 600 ~/.kaggle/kaggle.json

In [28]:
def download_competition_data() -> Path:
  # load_kaggle_creds()
  # !kaggle competitions download -c cassava-leaf-disease-classification
  path = mkdir("/content/data/cassava-leaf-disease-classification")
  with zipfile.ZipFile("cassava-leaf-disease-classification.zip", 'r') as z:
    z.extractall(path)
  return path

In [29]:
!rm -rf "/content/data/cassava-leaf-disease-classification"

In [33]:
def create_assignment_dataset(raw_path: Path) -> Path:
  dataset_path = mkdir("/content/data/cldc_assignment_data")

  src_path = raw_path / "train_images"
  dest_path = dataset_path / "train_images"
  dest_path.mkdir(parents=True, exist_ok=True)

  for img in src_path.glob("*.jpg"):
    im = Image.open(img)
    # images are (800, 600), resize to (X, 255)
    h = 255
    im = im.resize((h*(8/6), h), 0)
    im.save((dest_path / img.name))

  return dataset_path


In [30]:
raw_data_path = download_competition_data()
ds = create_assignment_dataset(raw_data_path)

In [35]:
!tar czf /content/assignment_data.tar.gz /content/data/cldc_assignment_data

tar: Removing leading `/' from member names


In [37]:
!cp /content/assignment_data.tar.gz /content/gdrive/MyDrive/Study/ds_and_ml/UCL_AMLSII/assignment/