Cell-1: Add Google Drive

In [6]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Cell-2: Import necessary libraries

In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import skimage.io as io
from zipfile import ZipFile

Cell-3: Extract the images

In [7]:
zipf = ZipFile('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_data/Images.zip')
zipf.extractall('/content/zindi_raw_images/')

Cell-4: Create dir

In [8]:
os.makedirs('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/', exist_ok=True)

Cell-5: Prepare train npy data

In [9]:
train_df = pd.read_csv('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_data/Train.csv')
all_train_imgs = []
all_train_labels = []
all_train_labels_quality = []
for i in tqdm(range(train_df.shape[0])):
    img = io.imread('/content/zindi_raw_images/Images/' + train_df.iloc[i][0] + '.jpeg')
    
    if img.ndim != 3:
       print(' Number of img dim is less then 3...skipping')
       continue
    
    if img.shape[0] < 43:
       print(' img height is less than 43...skipping')
       continue

    img = cv2.resize(img, (512, 256) )
    all_train_imgs.append(img[np.newaxis, ...])
    all_train_labels.append(train_df.iloc[i][1])
    all_train_labels_quality.append(train_df.iloc[i][2])

all_train_imgs = np.concatenate(all_train_imgs, axis=0)
all_train_labels = np.array(all_train_labels)
all_train_labels_quality = np.array(all_train_labels_quality)

np.save('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/train_imgs.npy', all_train_imgs)
np.save('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/train_labels.npy', all_train_labels)
np.save('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/train_labels_quality.npy', all_train_labels_quality)

del all_train_imgs, all_train_labels, all_train_labels_quality

 10%|▉         | 1039/10695 [00:04<00:43, 219.61it/s]

 Number of img dim is less then 3...skipping


 32%|███▏      | 3426/10695 [00:16<00:29, 249.99it/s]

 img height is less than 43...skipping


 53%|█████▎    | 5687/10695 [00:26<00:20, 247.01it/s]

 img height is less than 43...skipping


 88%|████████▊ | 9408/10695 [00:41<00:05, 244.02it/s]

 img height is less than 43...skipping


 94%|█████████▍| 10088/10695 [00:44<00:02, 243.80it/s]

 Number of img dim is less then 3...skipping


100%|██████████| 10695/10695 [00:46<00:00, 227.75it/s]


Cell-6: Prepare test npy data

In [10]:
test_df = pd.read_csv('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_data/SampleSubmission.csv')
all_test_uids = []
all_test_imgs = []
for uid in tqdm(test_df.UID.values):   
    all_test_uids.append(uid)   
    img = io.imread('/content/zindi_raw_images/Images/' + uid + '.jpeg')    
    img = cv2.resize(img, (512, 256) )
    all_test_imgs.append(img[np.newaxis,...])
all_test_imgs = np.concatenate(all_test_imgs, axis=0)
all_test_uids = np.array(all_test_uids)

np.save('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/test_imgs.npy', all_test_imgs)
np.save('/content/gdrive/My Drive/zindi_cgiar_wheat_growth_stage_challenge/zindi_npy_data/test_uids.npy', all_test_uids)

del all_test_imgs, all_test_uids

100%|██████████| 3558/3558 [00:39<00:00, 89.96it/s]
