# Workflow
- Get NAIP imagery from public Azure Blob (see notebook naip_download_pc.ipynb)
- Rasterize label data based on the NAIP image tile
- Create Image and Label fame name and Index match
- Store info in CSV

In [None]:
import os
import glob
from os import makedirs, path as op 
import geopandas as gpd
from subprocess import call
from rasterio.features import geometry_mask
import rasterio
import numpy as np


In [None]:
def fix_id(geojson, class_id, keyword, rankid):
    """adding class id to the label geojson and update output directory for new label"""
    gdf = gpd.read_file(geojson)
    gdf['class_id'] = int(class_id)
    outdir = f"../label_af_download/updated_labels_{keyword}"
    if not op.exists(outdir):
        makedirs(outdir)
    basename = op.basename(geojson)
    gdf.to_file(f"{outdir}/{rankid}_{basename}", driver="GeoJSON")
    return outdir

In [None]:
def get_key(val, main_dict):
    """to fetch key of label order based on the label class name"""
    for key, value in main_dict.items():
         if val == value:
             return key
 
    return "key doesn't exist"

In [None]:
def burn_base(raster, aoi, outfile):
    """rasterize aoi bounds"""
    gdf_aoi = gpd.read_file(aoi)
    with rasterio.open(raster, 'r') as src:
        profile = src.profile
        profile.update(
            dtype=rasterio.uint8, 
            count=1,
            compress='lzw'
        )
        fsrc = src.read()
        outshape = (fsrc.shape[1], fsrc.shape[2])
        transform_out = src.transform
        out_arr = np.zeros(outshape)
  
        out_label=geometry_mask(gdf_aoi.geometry,
                out_shape=outshape, 
                transform=transform_out, 
                all_touched=True,
                invert=True
            )
        with rasterio.open(outfile, "w", **profile) as dst:
            dst.write(out_label,1)
    return outfile

In [None]:
def burn_labels(base_mask, label_ls, outfile):
    """burn labels based on the priority order"""
    with rasterio.open(base_mask, 'r') as src:
        profile = src.profile
        profile.update(
            dtype=rasterio.uint8, 
            count=1,
            compress='lzw'
        )
        fsrc = src.read_masks(1)
        outshape = fsrc.shape
        print(outshape)
        transform_out = src.transform
        labels_arr=np.zeros(outshape)
        for geojson in label_ls:
            print(geojson)
            gdf= gpd.read_file(geojson)
            print(gdf.crs)
            print(len(gdf.geometry))
            print(gdf["class_id"].unique()[0])
            mask = geometry_mask(gdf.geometry, out_shape=outshape, transform=transform_out, all_touched=True, invert=True)
            print(np.unique(mask))
            update_mask = np.where(mask==True)
            labels_arr[update_mask] = gdf["class_id"].unique()[0]
            print(np.unique(labels_arr))
        with rasterio.open(outfile, "w", **profile) as dst:
            dst.write(labels_arr,1)

In [None]:
aoi0 = "../label_af_download/aoi_detroit_labeled/aoi0_bounds.geojson"
aoi1 = "../label_af_download/aoi_detroit_labeled/aoi1_bounds.geojson"
aoi2 = "../label_af_download/aoi_detroit_labeled/aoi2_bounds.geojson"
label_path = "../label_af_download/aoi_detroit_labeled"
aoi0_naip = "../label_af_download/downloaded_naip_aois/2018-07-06_naip_aoi0_bounds.tif"
aoi1_naip = "../label_af_download/downloaded_naip_aois/2012-06-29_naip_aoi1_bounds.tif"
aoi2_naip = "../label_af_download/downloaded_naip_aois/2016-08-03_naip_aoi2_bounds.tif"


### Raterize LULC Labels
The land classes should be burn in this order (1-7)
1 on the bottom and 7 burn the last, so it's on the top

7. tree_canopy
6. building
5. water
4. bare_soil
3. roads_railroads
2. grass_shrub
1. other_impervious

Though the real class IDs are:

- 0: Nodata (use the aoi)
- 1: Tree Canopy, 
- 2: Grass/Shrub, 
- 3: bare soil, 
- 4: water, 
- 5: buildings, 
- 6:roads/railroads, 
- 7:other impervious

In [None]:
labels_classes = {
    "impervious": 7,
    "building": 5, 
    "shrub":2, 
    "canopy":1,
    "railroads": 6, 
    "soil": 3, 
    "water": 4
}

In [None]:
burn_order ={
1: "impervious", 
2: "shrub",  
3: "railroads",
4:  "soil",
5:  "water",
6: "building",
7: "canopy",
}

In [None]:
# aoi0_labels = glob.glob(label_path+"/aoi_0/*.geojson")
aoi0_labels = glob.glob(label_path+"/aoi_0/*.geojson")
for label in aoi0_labels:
    basename=op.basename(label)
    filezero = op.splitext(basename)[0]
    keyword = filezero.split("_")[-1]
    class_id = labels_classes[keyword]
    rankid = get_key(keyword, burn_order)
    print(keyword, class_id)
    out_dir_or = fix_id(label, class_id, "aoi0", rankid)
    print(out_dir_or)
# out= '/'.join(subdir for subdir in out_dir_or.split("/")[:-1])
# print(out)
sorted_labels = sorted(glob.glob(out_dir_or + "/*.geojson"))
print(sorted_labels)
mask_path = burn_base(aoi0_naip, aoi0, "../label_af_download/aoi_detroit_labeled/mark0.tif")
burn_labels(mask_path, sorted_labels, "../label_af_download/aoi_detroit_labeled/aoi0_labels.tif")
# label_array(aoi0_labels, aoi0_naip, labels_classes, burn_order, "aoi0", aoi0, "../label_af_download/aoi_detroit_labeled/mark0.tif", "../label_af_download/aoi_detroit_labeled/aoi0_labels.tif")

In [None]:
aoi1_labels = glob.glob(label_path+"/aoi_1/*.geojson")
for label in aoi1_labels:
    basename=op.basename(label)
    filezero = op.splitext(basename)[0]
    keyword = filezero.split("_")[-1]
    class_id = labels_classes[keyword]
    rankid = get_key(keyword, burn_order)
    print(keyword, class_id)
    out_dir_or = fix_id(label, class_id, "aoi1", rankid)
    print(out_dir_or)
# out= '/'.join(subdir for subdir in out_dir_or.split("/")[:-1])
# print(out)
sorted_labels = sorted(glob.glob(out_dir_or + "/*.geojson"))
print(sorted_labels)
mask_path = burn_base(aoi1_naip, aoi1, "../label_af_download/aoi_detroit_labeled/mark1.tif")
burn_labels(mask_path, sorted_labels, "../label_af_download/aoi_detroit_labeled/aoi1_labels.tif")

In [None]:
aoi2_labels = glob.glob(label_path+"/aoi_2/*.geojson")
for label in aoi2_labels:
    basename=op.basename(label)
    filezero = op.splitext(basename)[0]
    keyword = filezero.split("_")[-1]
    class_id = labels_classes[keyword]
    rankid = get_key(keyword, burn_order)
    print(keyword, class_id)
    out_dir_or = fix_id(label, class_id, "aoi2", rankid)
    print(out_dir_or)
# out= '/'.join(subdir for subdir in out_dir_or.split("/")[:-1])
# print(out)
sorted_labels = sorted(glob.glob(out_dir_or + "/*.geojson"))
print(sorted_labels)
mask_path = burn_base(aoi2_naip, aoi2, "../label_af_download/aoi_detroit_labeled/mark2.tif")
burn_labels(mask_path, sorted_labels, "../label_af_download/aoi_detroit_labeled/aoi2_labels.tif")

In [42]:
import pandas as pd

In [53]:
df_train = pd.DataFrame()
df_val = pd.DataFrame()
df_test = pd.DataFrame()
label_path = "../label_af_download/trainingdataset-data-team_aois/labels"
image_path = "../label_af_download/trainingdataset-data-team_aois/naips"

In [45]:
images = sorted(glob.glob(image_path +"/*.tif"))
images

['../label_af_download/trainingdataset-data-team_aois/naips/2012-06-29_naip_aoi0_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2012-06-29_naip_aoi1_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2012-06-29_naip_aoi2_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2014-06-28_naip_aoi0_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2014-06-28_naip_aoi2_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2016-08-03_naip_aoi0_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2016-08-03_naip_aoi2_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2018-07-06_naip_aoi0_bounds.tif',
 '../label_af_download/trainingdataset-data-team_aois/naips/2018-07-07_naip_aoi2_bounds.tif']

In [46]:
base_url = "https://uvmlabels.blob.core.windows.net/"
label_key = "labels4-data-team-aois"
image_key = "naip4-data-team-aois"

In [54]:
image_url = base_url + image_key
label_url = base_url + label_key
train_img = []
train_label = []
for img in images[:-2]:
    basename = op.basename(img)
    filezeor = op.splitext(basename)[0]
    img_url = image_url + "/" + basename
    lab_url = label_url + "/" + filezeor + "_labels.tif"
    train_img.append(img_url)
    train_label.append(lab_url)


In [55]:
train_img

['https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2012-06-29_naip_aoi0_bounds.tif',
 'https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2012-06-29_naip_aoi1_bounds.tif',
 'https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2012-06-29_naip_aoi2_bounds.tif',
 'https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2014-06-28_naip_aoi0_bounds.tif',
 'https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2014-06-28_naip_aoi2_bounds.tif',
 'https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2016-08-03_naip_aoi0_bounds.tif',
 'https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2016-08-03_naip_aoi2_bounds.tif']

In [56]:
train_label

['https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2012-06-29_naip_aoi0_bounds_labels.tif',
 'https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2012-06-29_naip_aoi1_bounds_labels.tif',
 'https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2012-06-29_naip_aoi2_bounds_labels.tif',
 'https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2014-06-28_naip_aoi0_bounds_labels.tif',
 'https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2014-06-28_naip_aoi2_bounds_labels.tif',
 'https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2016-08-03_naip_aoi0_bounds_labels.tif',
 'https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2016-08-03_naip_aoi2_bounds_labels.tif']

In [57]:
df_train['image_fn'] = train_img
df_train["label_fn"] = train_label
df_train["group"] = "uvm"
df_train

Unnamed: 0,image_fn,label_fn,group
0,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
1,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
2,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
3,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
4,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
5,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
6,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm


In [58]:
df_val['image_fn'] = "https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2018-07-06_naip_aoi0_bounds.tif"
df_val["label_fn"] = "https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2018-07-06_naip_aoi0_bounds.tif"
df_val["group"] = "uvm"

In [59]:
df_test['image_fn'] = "https://uvmlabels.blob.core.windows.net/naip4-data-team-aois/2018-07-07_naip_aoi2_bounds.tif"
df_test["label_fn"] = "https://uvmlabels.blob.core.windows.net/labels4-data-team-aois/2018-07-07_naip_aoi2_bounds_labels.tif"
df_test["group"] = "uvm"

In [60]:
df_train.to_csv("DevSeed_Data_created_train.csv")
df_val.to_csv("DevSeed_Data_created_val.csv")
df_test.to_csv("DevSeed_Data_created_test.csv")

In [65]:
midwest_train = "../src/data/midwest_train_multi_year.csv"
midwest_val = "../src/data/midwest_val_multi_year.csv"
midwest_test = "../src/data/midwest_test_multi_year.csv"
midw_train = pd.read_csv(midwest_train)
midw_val = pd.read_csv(midwest_val)
midw_test = pd.read_csv(midwest_test)


In [62]:
midw_train.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,image_fn,label_fn,group
0,0,0,0,2,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
1,1,1,1,26,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
2,2,2,2,13,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
3,3,3,3,16,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
4,4,4,4,4,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label


In [66]:
midw_train = midw_train[["image_fn", "label_fn", "group"]]
midw_val = midw_val[["image_fn", "label_fn", "group"]]
midw_test = midw_test[["image_fn", "label_fn", "group"]]
midw_train.head()

Unnamed: 0,image_fn,label_fn,group
0,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
1,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
2,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
3,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label
4,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,umv_label


In [68]:
midwest_data_train = pd.concat([df_train, midw_train])
midwest_data_train['group'] = "uvm"

midwest_data_val = pd.concat([df_val, midw_val])
midwest_data_val['group'] = "uvm" 

midwest_data_test = pd.concat([df_test, midw_test])
midwest_data_test['group'] = "uvm" 
midwest_data_train.head()


Unnamed: 0,image_fn,label_fn,group
0,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
1,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
2,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
3,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm
4,https://uvmlabels.blob.core.windows.net/naip4-...,https://uvmlabels.blob.core.windows.net/labels...,uvm


In [69]:
midwest_data_val.head()

Unnamed: 0,image_fn,label_fn,group
0,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
1,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
2,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
3,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
4,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm


In [70]:
midwest_data_test.head()

Unnamed: 0,image_fn,label_fn,group
0,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
1,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
2,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
3,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/detroi...,uvm
4,https://naipblobs.blob.core.windows.net/naip/v...,https://uvmlabels.blob.core.windows.net/cuyaho...,uvm


In [71]:
len(midwest_data_test), len(midwest_data_val), len(midwest_data_train)

(24, 46, 167)

In [72]:
midwest_data_train.to_csv("../src/data/midwest_n_devseed_train_multiple_years.csv")
midwest_data_val.to_csv("../src/data/midwest_n_devseed_val_multiple_years.csv")
midwest_data_test.to_csv("../src/data/midwest_n_devseed_test_multiple_years.csv")