# Example 3: Tile2Vec features for CDL classification
In this notebook, we'll use a Tile2Vec model that has been pre-trained on the NAIP dataset to embed a small NAIP dataset and then train a classifier on the corresponding Cropland Data Layer (CDL) labels.

In [42]:
import numpy as np
import os
import torch
#from time import time
from torch.autograd import Variable
from skimage import io


import sys
sys.path.append('../')
from src.tilenet import make_tilenet
from src.resnet import ResNet18

## Step 1. Loading pre-trained model
In this step, we will initialize a new TileNet model and then load the pre-trained weights.

In [39]:
# Setting up model
in_channels = 4
z_dim = 512
cuda = torch.cuda.is_available()
#tilenet = make_tilenet(in_channels=in_channels, z_dim=z_dim)
# Use old model for now
tilenet = ResNet18()
if cuda: tilenet.cuda()

In [40]:
# Load parameters
model_fn = '../models/naip_trained.ckpt'
#checkpoint = torch.load(model_fn)
checkpoint = torch.load(model_fn, map_location='cpu')
tilenet.load_state_dict(checkpoint)
tilenet.eval()

ResNet(
  (conv1): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

## Step 2. Embed NAIP tiles
In this step, we'll use TileNet to embed the NAIP tiles provided in `tile2vec/data/tiles`. There are 1000 tiles in total, named `1tile.npy` through `1000tile.npy`.

In [48]:
# Get data
tile_dir = '../data/tiles'
n_tiles = 1000
y = np.load(os.path.join(tile_dir, 'y.npy'))
print(y.shape)

(1000,)


In [51]:
# Embed tiles
#t0 = time()
X = np.zeros((n_tiles, z_dim))
for idx in range(n_tiles):
    if idx == 0:
        print(tile.shape) 
    tile = np.load(os.path.join(tile_dir, '{}tile.npy'.format(idx+1)))
    # Get first 4 NAIP channels (5th is CDL mask)
    tile = tile[:,:,:4]
    if idx == 0:
        print(tile.shape)
    # Rearrange to PyTorch order
    tile = np.moveaxis(tile, -1, 0)
    if idx == 0:
        print(tile.shape)
    tile = np.expand_dims(tile, axis=0)
    if idx == 0:
        print(tile.shape)
        #print(tile)
    # Scale to [0, 1]
    tile = tile / 255
    # Embed tile
    tile = torch.from_numpy(tile).float()
    if idx == 0:
        print(tile.shape)
        #print(tile)
    tile = Variable(tile)
    if idx == 0:
        print(tile.shape)
        print(tile)
    if cuda: tile = tile.cuda()
    if idx == 0:
        print(tile.shape)
        print(tile)
    z = tilenet.encode(tile)
    if idx == 0:
        print(z.shape)
        print(z)
    if cuda: z = z.cpu()
    z = z.data.numpy()
    if idx == 0:
        print(z.shape)
        print(z)
    X[idx,:] = z
#t1 = time()
print('Embedded {} tiles: {:0.3f}s'.format(n_tiles, 0.0))

torch.Size([1, 4, 50, 50])
(50, 50, 4)
(4, 50, 50)
(1, 4, 50, 50)
torch.Size([1, 4, 50, 50])
torch.Size([1, 4, 50, 50])
tensor([[[[0.7137, 0.7176, 0.7216,  ..., 0.6902, 0.6863, 0.6706],
          [0.7137, 0.7098, 0.7137,  ..., 0.6980, 0.6902, 0.6863],
          [0.7137, 0.7137, 0.7098,  ..., 0.6980, 0.6902, 0.6902],
          ...,
          [0.6980, 0.7020, 0.7020,  ..., 0.6902, 0.6902, 0.6902],
          [0.6549, 0.6824, 0.6824,  ..., 0.6588, 0.6784, 0.6824],
          [0.4471, 0.5176, 0.5176,  ..., 0.3608, 0.4784, 0.5294]],

         [[0.6627, 0.6667, 0.6667,  ..., 0.6431, 0.6196, 0.6157],
          [0.6627, 0.6588, 0.6588,  ..., 0.6353, 0.6314, 0.6275],
          [0.6588, 0.6588, 0.6588,  ..., 0.6392, 0.6353, 0.6353],
          ...,
          [0.6431, 0.6471, 0.6471,  ..., 0.6353, 0.6431, 0.6471],
          [0.5843, 0.6314, 0.6314,  ..., 0.5922, 0.6118, 0.6353],
          [0.3843, 0.4784, 0.4784,  ..., 0.3294, 0.4275, 0.4902]],

         [[0.5647, 0.5647, 0.5725,  ..., 0.5412, 0.525

Embedded 1000 tiles: 0.000s


## Step 3. Train random forest classifier
In this step, we'll split the dataset into train and test sets and train a random forest classifier to predict CDL classes.

In [51]:
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [52]:
# Check CDL classes
print(set(y))

{1.0, 2.0, 21.0, 152.0, 24.0, 28.0, 36.0, 176.0, 49.0, 54.0, 61.0, 69.0, 71.0, 72.0, 75.0, 76.0, 205.0, 204.0, 208.0, 212.0, 217.0, 225.0, 236.0, 111.0, 121.0, 122.0, 123.0, 124.0}


Since the CDL classes are not numbered in consecutive order, we'll start by reindexing the classes from 0.

In [53]:
# Reindex CDL classes
y = LabelEncoder().fit_transform(y)
print(set(y))

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}


We can randomly split the data and train a random forest classifier many times to get an estimate of the average accuracy.

In [72]:
n_trials = 100
accs = np.zeros((n_trials,))
for i in range(n_trials):
    # Splitting data and training RF classifer
    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2)
    rf = RandomForestClassifier()
    rf.fit(X_tr, y_tr)
    accs[i] = rf.score(X_te, y_te)
print('Mean accuracy: {:0.4f}'.format(accs.mean()))
print('Standard deviation: {:0.4f}'.format(accs.std()))

Mean accuracy: 0.6876
Standard deviation: 0.0299
