In [1]:
%matplotlib inline
import sys, os, time
import pandas as pd
import numpy as np
import json

from collections import defaultdict

import matplotlib
import matplotlib.pyplot as plt

import fiona
import fiona.transform
import rasterio
import rasterio.mask
import shapely
import shapely.geometry

In [2]:
def bounds_intersection(bound0, bound1):
    left0, bottom0, right0, top0 = bound0
    left1, bottom1, right1, top1 = bound1
    left, bottom, right, top = \
            max([left0, left1]), max([bottom0, bottom1]), \
            min([right0, right1]), min([top0, top1])
    return (left, bottom, right, top)

In [3]:
new_to_old_map = {}
f = open("../data/2013-2014_to_2011-2012.csv", "r")
f.readline()
lines = f.read().strip().split("\n")
for line in lines:
    parts = line.split(",")
    new_to_old_map[parts[0]] = parts[1]
f.close()

In [4]:
states = [
    "de_1m_2013", # 107 tiles
    "ny_1m_2013", # 407 tiles
    "md_1m_2013", # 691 tiles
    "pa_1m_2013", # 2239 tiles
    "wv_1m_2014", # 292 tiles
    "va_1m_2014"  # 1238 tiles
]

In [5]:
states

['de_1m_2013',
 'ny_1m_2013',
 'md_1m_2013',
 'pa_1m_2013',
 'wv_1m_2014',
 'va_1m_2014']

## Sample patches from the train and val files

In [6]:
def write_new_tiff(fn, data, transform, crs):
    count, height, width = data.shape
    
    new_profile = {
        "driver": "GTiff",
        "height": height,
        "width": width,
        "crs": crs,
        "dtype": "uint8",
        "count": count,
        "transform": transform,
        "compress": "lzw"
    }
    
    f = rasterio.open(fn, "w", **new_profile)
    f.write(data)
    f.close()
    
def write_new_tiff_float(fn, data, transform, crs):
    count, height, width = data.shape
    
    new_profile = {
        "driver": "GTiff",
        "height": height,
        "width": width,
        "crs": crs,
        "dtype": "float32",
        "count": count,
        "transform": transform,
        "compress": "lzw",
        "predictor": 3
    }
    
    f = rasterio.open(fn, "w", **new_profile)
    f.write(data)
    f.close()

## Make small dataset

In [None]:
def make_dataset(fns, state, output_dir):

    for i, lc_fn in enumerate(fns):
        print(i, len(fns))

        new_naip_fn = lc_fn.replace("resampled-lc", "esri-naip")[:-7] + ".mrf"
        old_naip_fn = new_to_old_map[new_naip_fn]
        nlcd_fn = old_naip_fn.replace("esri-naip", "resampled-nlcd")[:-4] + "_nlcd.tif"

        naip_f = rasterio.open(new_naip_fn, "r")
        crs = naip_f.crs["init"]
        naip_bounds = naip_f.bounds

        lc_f = rasterio.open(lc_fn, "r")
        assert lc_f.crs["init"] == crs
        lc_bounds = lc_f.bounds

        nlcd_f = rasterio.open(nlcd_fn, "r")
        assert nlcd_f.crs["init"] == crs
        nlcd_bounds = nlcd_f.bounds

        bounds = bounds_intersection(bounds_intersection(naip_bounds, lc_bounds), nlcd_bounds)
        left, bottom, right, top = bounds
        geom = shapely.geometry.mapping(shapely.geometry.box(left, bottom, right, top, ccw=True))
                
        naip_data, naip_transform = rasterio.mask.mask(naip_f, [geom], crop=True)
        naip_f.close()
        lc_data, lc_transform = rasterio.mask.mask(lc_f, [geom], crop=True)
        lc_f.close()
        nlcd_data, nlcd_transform = rasterio.mask.mask(nlcd_f, [geom], crop=True)
        nlcd_f.close()

        write_new_tiff(output_dir + os.path.basename(new_naip_fn)[:-4] + "_naip.tif", naip_data, naip_transform, crs)
        write_new_tiff(output_dir + os.path.basename(nlcd_fn), nlcd_data, nlcd_transform, crs)
        write_new_tiff(output_dir + os.path.basename(lc_fn), lc_data, lc_transform, crs)

In [None]:
for state in states:
    for ds in ["train", "val"]:
        print(state, ds)
        output_dir = "/mnt/blobfuse/cnn-minibatches/cvpr_splits/%s_%s_tiles/" % (state, ds)
        os.makedirs(output_dir, exist_ok=True)

        f = open("splits/%s_%s.txt" % (state, ds),"r")
        fns = f.read().strip().split("\n")
        f.close()

        make_dataset(fns, state, output_dir)

## Make extended dataset

In [None]:
def make_dataset_big(fns, state, output_dir):

    for i, lc_fn in enumerate(fns):
        print(i, len(fns))

        new_naip_fn = lc_fn.replace("resampled-lc", "esri-naip")[:-7] + ".mrf"
        old_naip_fn = new_to_old_map[new_naip_fn]
        nlcd_fn = old_naip_fn.replace("esri-naip", "resampled-nlcd")[:-4] + "_nlcd.tif"
        leafon_fn = old_naip_fn.replace("esri-naip/data/v1", "resampled-landsat8/data/leaf_on")[:-4] + "_landsat.tif"
        leafoff_fn = old_naip_fn.replace("esri-naip/data/v1", "resampled-landsat8/data/leaf_off")[:-4] + "_landsat.tif"
        building_fn = old_naip_fn.replace("esri-naip", "resampled-buildings")[:-4] + "_building.tif"

        temp_fns = [
            (new_naip_fn, "_".join(os.path.basename(new_naip_fn).split("_")[:-1]) + "_naip-new.tif"),
            (old_naip_fn, "_".join(os.path.basename(new_naip_fn).split("_")[:-1]) + "_naip-old.tif"),
            (lc_fn, "_".join(os.path.basename(new_naip_fn).split("_")[:-1]) + "_lc.tif"),
            (nlcd_fn, "_".join(os.path.basename(new_naip_fn).split("_")[:-1]) + "_nlcd.tif"),
            (leafon_fn, "_".join(os.path.basename(new_naip_fn).split("_")[:-1]) + "_landsat-leaf-on.tif"),
            (leafoff_fn, "_".join(os.path.basename(new_naip_fn).split("_")[:-1]) + "_landsat-leaf-off.tif"),
            (building_fn, "_".join(os.path.basename(new_naip_fn).split("_")[:-1]) + "_buildings.tif"),
        ]
        
        
        bound_intersection = None
        common_crs = None
        open_files = []
        
        for (fn, new_fn) in temp_fns:
            f = rasterio.open(fn, "r")
            crs = f.crs.to_string()
            bounds = f.bounds
        
            if common_crs is None:
                common_crs = crs
            else:
                assert common_crs == crs
                
            if bound_intersection is None:
                bound_intersection = bounds
            else:
                bound_intersection = bounds_intersection(bounds, bound_intersection)
            
            open_files.append(f)

        left, bottom, right, top = bound_intersection
        geom = shapely.geometry.mapping(shapely.geometry.box(left, bottom, right, top, ccw=True))

        for j, f in enumerate(open_files):
            data, transform = rasterio.mask.mask(f, [geom], crop=True)
            f.close()
            
            assert data.shape[1] > 100 and data.shape[2] > 100
            
            if j == 6: # buildings
                data = 1-data
                
            if data.dtype == np.uint8 or data.dtype == np.int8:
                data = data.astype(np.uint8)
                write_new_tiff(output_dir + temp_fns[j][1], data, transform, common_crs)
            else:
                write_new_tiff_float(output_dir + temp_fns[j][1], data, transform, common_crs)

In [None]:
for state in states:
    for ds in ["val","test","extended-train"]:
        print(state, ds)
        
        extended_ds = ds if ds.startswith("extended") else "extended-%s" % (ds)
        
        output_dir = "/home/caleb/data/%s_%s_tiles/" % (state, extended_ds)
        os.makedirs(output_dir, exist_ok=True)

        f = open("../splits/%s_%s.txt" % (state, ds),"r")
        fns = f.read().strip().split("\n")
        f.close()

        make_dataset_big(fns, state, output_dir)

de_1m_2013 val
0 5
1 5
2 5
3 5
4 5
de_1m_2013 test
0 20
1 20
2 20
3 20
4 20
5 20
6 20
7 20
8 20
9 20
10 20
11 20
12 20
13 20
14 20
15 20
16 20
17 20
18 20
19 20
de_1m_2013 extended-train
0 82
1 82
2 82
3 82
4 82
5 82
6 82
7 82
8 82
9 82
10 82
11 82
12 82
13 82
14 82
15 82
16 82
17 82
18 82
19 82
20 82
21 82
22 82
23 82
24 82
25 82
26 82
27 82
28 82
29 82
30 82
31 82
32 82
33 82
34 82
35 82
36 82
37 82
38 82
39 82
40 82
41 82
42 82
43 82
44 82
45 82
46 82
47 82
48 82
49 82
50 82
51 82
52 82
53 82
54 82
55 82
56 82
57 82
58 82
59 82
60 82
61 82
62 82
63 82
64 82
65 82
66 82
67 82
68 82
69 82
70 82
71 82
72 82
73 82
74 82
75 82
76 82
77 82
78 82
79 82
80 82
81 82
ny_1m_2013 val
0 5
1 5
2 5
3 5
4 5
ny_1m_2013 test
0 20
1 20
2 20
3 20
4 20
5 20
6 20
7 20
8 20
9 20
10 20
11 20
12 20
13 20
14 20
15 20
16 20
17 20
18 20
19 20
ny_1m_2013 extended-train
0 100
1 100
2 100
3 100
4 100
5 100
6 100
7 100
8 100
9 100
10 100
11 100
12 100
13 100
14 100
15 100
16 100
17 100
18 100
19 100
20 100
21 100
