In [2]:
%matplotlib inline
import sys, os, time
import pandas as pd
import numpy as np
import json

from collections import defaultdict

import matplotlib
import matplotlib.pyplot as plt

import fiona
import fiona.transform
import rasterio
import rasterio.mask
import shapely
import shapely.geometry

In [3]:
def bounds_intersection(bound0, bound1):
    left0, bottom0, right0, top0 = bound0
    left1, bottom1, right1, top1 = bound1
    left, bottom, right, top = \
            max([left0, left1]), max([bottom0, bottom1]), \
            min([right0, right1]), min([top0, top1])
    return (left, bottom, right, top)

In [4]:
new_to_old_map = {}
f = open("data/2013_2014-to-2011_2012.csv", "r")
f.readline()
lines = f.read().strip().split("\n")
for line in lines:
    parts = line.split(",")
    new_to_old_map[parts[0]] = parts[1]
f.close()

In [5]:
states = [
    "de_1m_2013", # 107 tiles
    "ny_1m_2013", # 407 tiles
    "md_1m_2013", # 691 tiles
    "pa_1m_2013", # 2239 tiles
    "wv_1m_2014", # 292 tiles
    "va_1m_2014"  # 1238 tiles
]

In [44]:
states

['de_1m_2013',
 'ny_1m_2013',
 'md_1m_2013',
 'pa_1m_2013',
 'wv_1m_2014',
 'va_1m_2014']

## Sample patches from the train and val files

In [38]:
def write_new_tiff(fn, data, transform, crs):
    count, height, width = data.shape
    
    new_profile = {
        "driver": "GTiff",
        "height": height,
        "width": width,
        "crs": crs,
        "dtype": "uint8",
        "count": count,
        "transform": transform,
        "compress": "lzw"
    }
    
    f = rasterio.open(fn, "w", **new_profile)
    f.write(data)
    f.close()

In [41]:
def make_dataset(fns, state, output_dir):

    for i, lc_fn in enumerate(fns):
        print(i, len(fns))

        new_naip_fn = lc_fn.replace("resampled-lc", "esri-naip")[:-7] + ".mrf"
        old_naip_fn = new_to_old_map[new_naip_fn]
        nlcd_fn = old_naip_fn.replace("esri-naip", "resampled-nlcd")[:-4] + "_nlcd.tif"

        naip_f = rasterio.open(new_naip_fn, "r")
        crs = naip_f.crs["init"]
        naip_bounds = naip_f.bounds

        lc_f = rasterio.open(lc_fn, "r")
        assert lc_f.crs["init"] == crs
        lc_bounds = lc_f.bounds

        nlcd_f = rasterio.open(nlcd_fn, "r")
        assert nlcd_f.crs["init"] == crs
        nlcd_bounds = nlcd_f.bounds

        bounds = bounds_intersection(bounds_intersection(naip_bounds, lc_bounds), nlcd_bounds)
        left, bottom, right, top = bounds
        geom = shapely.geometry.mapping(shapely.geometry.box(left, bottom, right, top, ccw=True))
                
        naip_data, naip_transform = rasterio.mask.mask(naip_f, [geom], crop=True)
        naip_f.close()
        lc_data, lc_transform = rasterio.mask.mask(lc_f, [geom], crop=True)
        lc_f.close()
        nlcd_data, nlcd_transform = rasterio.mask.mask(nlcd_f, [geom], crop=True)
        nlcd_f.close()

        write_new_tiff(output_dir + os.path.basename(new_naip_fn)[:-4] + "_naip.tif", naip_data, naip_transform, crs)
        write_new_tiff(output_dir + os.path.basename(nlcd_fn), nlcd_data, nlcd_transform, crs)
        write_new_tiff(output_dir + os.path.basename(lc_fn), lc_data, lc_transform, crs)
        

In [43]:
for state in states:
    for ds in ["train", "val"]:
        print(state, ds)
        output_dir = "/mnt/blobfuse/cnn-minibatches/cvpr_splits/%s_%s_tiles/" % (state, ds)
        os.makedirs(output_dir, exist_ok=True)

        f = open("splits/%s_%s.txt" % (state, ds),"r")
        fns = f.read().strip().split("\n")
        f.close()

        make_dataset(fns, state, output_dir)

de_1m_2013 train
0 25
1 25
2 25
3 25
4 25
5 25
6 25
7 25
8 25
9 25
10 25
11 25
12 25
13 25
14 25
15 25
16 25
17 25
18 25
19 25
20 25
21 25
22 25
23 25
24 25
de_1m_2013 val
0 5
1 5
2 5
3 5
4 5
ny_1m_2013 train
0 25
1 25
2 25
3 25
4 25
5 25
6 25
7 25
8 25
9 25
10 25
11 25
12 25
13 25
14 25
15 25
16 25
17 25
18 25
19 25
20 25
21 25
22 25
23 25
24 25
ny_1m_2013 val
0 5
1 5
2 5
3 5
4 5
md_1m_2013 train
0 25
1 25
2 25
3 25
4 25
5 25
6 25
7 25
8 25
9 25
10 25
11 25
12 25
13 25
14 25
15 25
16 25
17 25
18 25
19 25
20 25
21 25
22 25
23 25
24 25
md_1m_2013 val
0 5
1 5
2 5
3 5
4 5
pa_1m_2013 train
0 25
1 25
2 25
3 25
4 25
5 25
6 25
7 25
8 25
9 25
10 25
11 25
12 25
13 25
14 25
15 25
16 25
17 25
18 25
19 25
20 25
21 25
22 25
23 25
24 25
pa_1m_2013 val
0 5
1 5
2 5
3 5
4 5
wv_1m_2014 train
0 25
1 25
2 25
3 25
4 25
5 25
6 25
7 25
8 25
9 25
10 25
11 25
12 25
13 25
14 25
15 25
16 25
17 25
18 25
19 25
20 25
21 25
22 25
23 25
24 25
wv_1m_2014 val
0 5
1 5
2 5
3 5
4 5
va_1m_2014 train
0 25
1 25
2 25
3 25
4 2