<a href="https://colab.research.google.com/github/hmlewis-astro/street_network_deep_learning/blob/main/pkl_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Change to GPU runtime

#### Navigate to "Runtime > Change runtime type > GPU > Save"

# Download Kaggle API credentials
#### **Note**: This is a one-time step and you don’t need to generate the credentials every time you download the dataset.
- Navigate to your Kaggle profile
- Click the "Account" tab
- Scroll down to the "API" section
- Click "Create New API Token"; a file named `kaggle.json` will be download which contains your username and API key

# Upload Kaggle API credentials to Google Colab
#### **Note**: Uploaded files will get deleted when this runtime is recycled.
- Upload the `kaggle.json` file that you just downloaded from Kaggle
- Run the following cell

In [2]:
!pip install -q kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download balraj98/deepglobe-road-extraction-dataset

Downloading deepglobe-road-extraction-dataset.zip to /content
100% 3.79G/3.79G [01:07<00:00, 49.1MB/s]
100% 3.79G/3.79G [01:07<00:00, 60.1MB/s]


In [3]:
!unzip -q /content/deepglobe-road-extraction-dataset.zip -d /content/deep-globe

In [4]:
!rm -rf /content/deepglobe-road-extraction-dataset.zip

# Import packages and libraries

In [5]:
import os
import glob
import random
from tqdm import tqdm
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2


# Load data

In [6]:
def load_data(load_dict=None, input_img_paths=None, target_img_paths=None, image_size=(128, 128)):
    image_names = os.listdir(input_img_paths)
    target_names = []

    for name in image_names:
        name = name.split('_')[0]
        if name not in target_names:
            target_names.append(name)
    
    image_dir = input_img_paths + '/'
    target_dir = target_img_paths + '/'
    
    for i in tqdm(range(len(image_names))):
        try:
            img = plt.imread(image_dir + target_names[i] + '_sat.jpg') 
            target = plt.imread(target_dir + target_names[i] + '_mask.png')
            
        except:
            continue

        img = (cv2.resize(img, image_size) / 255.)
        target = cv2.resize(target, image_size).astype(int)

        load_dict['img'].append(img)
        load_dict['mask'].append(target[:,:,0])
        
    return load_dict


In [7]:
image_size = (128, 128)

train_dict = {'img' : [], 'mask' : []}

input_img_paths = 'deep-globe/train'
target_img_paths = 'deep-globe/train'
train_dict = load_data(train_dict, input_img_paths, target_img_paths, image_size)

100%|██████████| 12452/12452 [06:54<00:00, 30.01it/s]


In [8]:
X_train, y_train = train_dict['img'], train_dict['mask']

In [12]:
len(X_train), X_train[-1].shape, len(y_train), y_train[-1].shape

(6226, (128, 128, 3), 6226, (128, 128))

In [13]:
with open('deep-globe-sat-img-train.pkl', 'wb') as f:
    pickle.dump(X_train, f, pickle.HIGHEST_PROTOCOL)

with open('deep-globe-mask-train.pkl', 'wb') as f:
    pickle.dump(y_train, f, pickle.HIGHEST_PROTOCOL)

In [32]:
def load_test(load_dict=None, input_img_paths=None, image_size=(128, 128)):
    image_names = os.listdir(input_img_paths)

    for name in image_names:
        name = name.split('_')[0]
    
    image_dir = input_img_paths + '/'
    
    for i in tqdm(range(len(image_names))):
        try:
            img = plt.imread(image_dir + image_names[i]) 
            
        except:
            continue

        img = (cv2.resize(img, image_size) / 255.)

        load_dict['img'].append(img)
        
    return load_dict

In [33]:
test_dict = {'img' : []}

input_img_paths = 'deep-globe/test'
test_dict = load_test(test_dict, input_img_paths, image_size)

input_img_paths = 'deep-globe/valid'
test_dict = load_test(test_dict, input_img_paths, image_size)


100%|██████████| 1101/1101 [00:28<00:00, 38.56it/s]
100%|██████████| 1243/1243 [00:33<00:00, 37.33it/s]


In [35]:
len(test_dict['img'])

2344

In [36]:
X_test = test_dict['img']

In [37]:
with open('deep-globe-sat-img-test.pkl', 'wb') as f:
    pickle.dump(X_test, f, pickle.HIGHEST_PROTOCOL)