## Copy triplets

In [11]:
import os
import shutil
from time import time

In [15]:
in_dir = '../../pixel2vec/data/interim/naip_fresno/triplets50_100/'
out_dir = '../data/triplets/'
N = 100000
print_every = N // 100
in_names = ['patch', 'neighbor', 'distant']

In [16]:
t0 = time()
for i in range(N):
    for in_name in in_names:
        in_fn = os.path.join(in_dir, '{}{}.npy'.format(i, in_name))
        if in_name == 'patch': in_name = 'anchor'
        out_fn = os.path.join(out_dir, '{}{}.npy'.format(i, in_name))
        shutil.copyfile(in_fn, out_fn)
    if (i+1) % print_every == 0:
        t1 = time()
        print('Copied {} triplets: {:0.3f}s'.format(i+1, t1-t0))

Copied 1000 triplets: 42.600s
Copied 2000 triplets: 88.328s
Copied 3000 triplets: 132.211s
Copied 4000 triplets: 173.892s
Copied 5000 triplets: 216.376s
Copied 6000 triplets: 259.270s
Copied 7000 triplets: 298.617s
Copied 8000 triplets: 334.446s
Copied 9000 triplets: 372.016s
Copied 10000 triplets: 409.544s
Copied 11000 triplets: 443.262s
Copied 12000 triplets: 481.720s
Copied 13000 triplets: 535.706s
Copied 14000 triplets: 595.052s
Copied 15000 triplets: 649.317s
Copied 16000 triplets: 716.446s
Copied 17000 triplets: 785.892s
Copied 18000 triplets: 860.395s
Copied 19000 triplets: 933.548s
Copied 20000 triplets: 981.090s
Copied 21000 triplets: 1026.808s
Copied 22000 triplets: 1072.534s
Copied 23000 triplets: 1118.380s
Copied 24000 triplets: 1168.539s
Copied 25000 triplets: 1214.856s
Copied 26000 triplets: 1266.167s
Copied 27000 triplets: 1318.196s
Copied 28000 triplets: 1369.374s
Copied 29000 triplets: 1438.996s
Copied 30000 triplets: 1530.829s
Copied 31000 triplets: 1622.415s
Copied 3

## Copy tiles and labels
We also want to provide a small set of NAIP tiles with corresponding CDL labels so that people can apply the trained model in the classification example.

In [19]:
import numpy as np

In [39]:
in_dir = '../../pixel2vec/data/interim/naip_fresno/triplets50_100/'
out_dir = '../data/tiles/'
print_every = 100
in_names = ['patch', 'neighbor', 'distant']
label_fn = '../../pixel2vec/data/interim/naip_fresno/y80_50_100.npy'

In [65]:
# Filter out NaNs
y = np.load(label_fn)
t0 = time()
copy_fns = []
ys = []
count = 0
for i in range(1000):
    for in_name in in_names:
        in_fn = os.path.join(in_dir, '{}{}.npy'.format(i, in_name))
        if not np.isnan(y[count]):
            copy_fns.append(in_fn)
            ys.append(y[count])
        count += 1
    if (i+1) % print_every == 0:
        t1 = time()
        print('Checked {} tile triplets: {:0.3f}s'.format(i+1, t1-t0))

Checked 100 tile triplets: 0.003s
Checked 200 tile triplets: 0.006s
Checked 300 tile triplets: 0.009s
Checked 400 tile triplets: 0.012s
Checked 500 tile triplets: 0.015s
Checked 600 tile triplets: 0.017s
Checked 700 tile triplets: 0.020s
Checked 800 tile triplets: 0.022s
Checked 900 tile triplets: 0.025s
Checked 1000 tile triplets: 0.027s


In [66]:
print(len(copy_fns), len(ys))

2323 2323


In [67]:
# Get first 1000 tiles
copy_fns = copy_fns[:1000]
ys = np.array(ys[:1000])

In [68]:
# Copy tiles
for idx, in_fn in enumerate(copy_fns):
    out_fn = os.path.join(out_dir, '{}tile.npy'.format(idx+1))
    shutil.copyfile(in_fn, out_fn)
# Save labels
np.save(os.path.join(out_dir, 'y.npy'), ys)

## Prepare NAIP tiles for example 1
Need to chop up large tif to provide smaller TIFs for example 1.

In [1]:
import gdal
import numpy as np

In [2]:
in_fn = '../data/large_naip/naip_fresno_2016_row_0_column_0.tif'
obj = gdal.Open(in_fn)
img = obj.ReadAsArray().astype(np.uint8)
print(img.shape)

(5, 9278, 9278)
