# Import Data

**Authors:** Christopher Sun, Jai Sharma, Milind Maiti

**Date:** 2022.06.16

**Description:** This module imports the data from the IEEE dataport data set equivalent on Kaggle. The file paths shown here are meant to be with respect to the Kaggle notebooks interface.

## Import Libraries

In [1]:
# Import necessary libraries
import cv2
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks, Sequential, Model
from tensorflow.keras.layers import Dropout, BatchNormalization, Activation, Dense, Input
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io
import skimage
import json
import os

# Print Confirmation
print("Setup Complete")

## Find File Paths

In [3]:
# Gets all of the file paths to the j2k RGB images, tif pixelwise elevation 
# masks, and the json files with the labels for each example
path = "../input/nga-overhead-geopose/geopose_train/geopose_train"

file_names = os.listdir(path)
file_names.sort()
tif_files = []
j2k_files = []
json_files = []

for i in file_names:
    if i[-3:] == "tif":
        tif_files.append(i)
    elif i[-3:] == "j2k":
        j2k_files.append(i)
    else:
        json_files.append(i)

j2k_files = sorted(j2k_files)
json_files = sorted(json_files)
tif_files = sorted(tif_files)

## Get Labels for Geopose Model

In [13]:
# Get the scale and angle labels from the json files
df = []

for i in range(len(json_files)):
    temp = open(path + "/" + json_files[i])
    data = json.load(temp)
    df.append(data)

scale = [i["scale"] for i in df]
angle = [i["angle"] for i in df]
y = pd.DataFrame({"scale": scale, "angle": angle})
y_np = np.array(y)

## Get RGB Images and Elevation Masks

In [None]:
# Get the j2k RGB satellite images and the tif pixelwise elevation masks and 
# adjust the size to comply with computational restraints
dim = (256,256)
X = []
tifs = []
num_images = len(tif_files)
for i in range(0, num_images):
    img = np.array(skimage.io.imread(path + "/" + j2k_files[i]))
    mask = np.array(skimage.io.imread(path + "/" + tif_files[i]))
    resize_img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    resize_mask = cv2.resize(mask, dim, interpolation = cv2.INTER_AREA)
    X.append(resize_img)
    tifs.append(resize_mask)
    
X = np.array(X)
tifs = np.array(tifs)