In [None]:
import os
import json
import pandas as pd

In [None]:
new_to_old_map = {}
f = open("../data/2013-2014_to_2011-2012.csv", "r")
f.readline()
lines = f.read().strip().split("\n")
for line in lines:
    parts = line.split(",")
    u = os.path.basename(parts[0])[:-4] + "_lc.tif"
    v = os.path.basename(parts[1])[:-4] + "_lc.tif"
    new_to_old_map[u] = v
f.close()

In [None]:
states = [
    "de_1m_2013", # 107 tiles
    "ny_1m_2013", # 407 tiles
    "md_1m_2013", # 691 tiles
    "pa_1m_2013", # 2239 tiles
    "wv_1m_2014", # 292 tiles
    "va_1m_2014"  # 1238 tiles
]

In [None]:
fn_mapping = {}
for state in states:
    for ds in ["train", "val", "test"]:
        f = open("../splits/%s_%s.txt" % (state, ds),"r")
        fns = [
            os.path.basename(fn)
            for fn in f.read().strip().split("\n")
        ]
        f.close()

        f = open("../../data/%s_%s_tiles.csv" % (state, ds), "w")
        f.write("id,naip_fn,nlcd_fn,lc_fn\n")
        for i, lc_fn in enumerate(fns):
            f.write("%d,%s,%s,%s\n" % (
                i,
                "%s_%s_tiles/%s" % (state, ds, lc_fn[:-7] + "_naip.tif"),
                "%s_%s_tiles/%s" % (state, ds, new_to_old_map[lc_fn][:-7] + "_nlcd.tif"),
                "%s_%s_tiles/%s" % (state, ds, lc_fn),
            ))
            fn_mapping[(state,ds,lc_fn)] = i
        f.close()

In [None]:
BASE_DIR = "../../data/"
for state in states:
    for ds in ["train", "val", "test"]:
        
        df = pd.read_csv("../../data/%s_%s_tiles.csv" % (state, ds))

        for fn in df["naip_fn"].values:
            assert os.path.exists(os.path.join(BASE_DIR, fn))
        for fn in df["lc_fn"].values:
            assert os.path.exists(os.path.join(BASE_DIR, fn)), fn
        for fn in df["nlcd_fn"].values:
            assert os.path.exists(os.path.join(BASE_DIR, fn))

In [None]:
LC_VALS = [1, 2, 3, 4, 5, 6, 15]
NLCD_VALS = [0, 11, 12, 21, 22, 23, 24, 31, 41, 42, 43, 51, 52, 71, 72, 73, 74, 81, 82, 90, 95, 255]

LC_HEADER_STRING = ','.join([
    "lc_class_%d_count" % (val)
    for val in LC_VALS
])
NLCD_HEADER_STRING = ','.join([
    "nlcd_class_%d_count" % (val)
    for val in NLCD_VALS
])

for state in states:
    for ds in ["train", "val"]:
        
        f = open("../splits/%s_%s_metadata.csv" % (state, ds),"r")
        lines = [
            line.strip().split(",")
            for line in f.read().strip().split("\n")
        ]
        f.close()

        
        f = open("../../data/%s_%s_patches.csv" % (state, ds), "w")
        f.write("patch_id,patch_fn,tile_id,x_coord,y_coord,size,%s,%s\n" % (LC_HEADER_STRING, NLCD_HEADER_STRING))
        for i, row in enumerate(lines):
            patch_fn = "%s_%s_patches/%s" % (state, ds, os.path.basename(row[0]))
            lc_fn = os.path.basename(row[1])[:-4] + "_lc.tif"
            tile_id = fn_mapping[(state,ds,lc_fn)]
            x_coord = int(row[2])
            y_coord = int(row[3])
            size = 240
            remainder = row[4:]
            
            f.write("%d,%s,%d,%d,%d,%d,%s\n" % (
                i,
                patch_fn,
                tile_id,
                x_coord,
                y_coord,
                size,
                ','.join(remainder)
            ))
        
        f.close()

In [None]:
for state in states:
    for ds in ["train", "val"]:
        
        f = open("../splits/%s_%s_shapes.txt" % (state, ds),"r")
        lines = f.read().strip().split("\n")
        f.close()
        print(len(lines))
        
        geojson = {
            "type": "FeatureCollection",
            "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
            "features": [
                { "type": "Feature", "properties": { "patch_id": i}, "geometry": json.loads(line)}
                for i, line in enumerate(lines)
            ]
        }
        
        f = open("../../data/%s_%s_patches.geojson" % (state, ds), "w")
        f.write(json.dumps(geojson))
        f.close()