All code used to run the analysis is located in the online repository here:
https://github.com/jonathanventura/canopy
Files needed:Hyperspectral Imagery: data/NEON_D17_TEAK_DP1_20170627_181333_reflectance.tif


In [2]:
import numpy as np
import sklearn
import torch



In [3]:
#  Not needed as I have the actual file locally because this code wasn't working for me :/
# if not os.path.exists('tree_species_classifier_data.npz'):
#   !wget -O tree_species_classifier_data.npz "https://www.dropbox.com/scl/fi/b7mw23k3ifaeui9m8nnn3/tree_species_classifier_data.npz?rlkey=bgxp37c1t04i7q35waf3slc26&dl=1"

In [4]:
data = np.load('tree_species_classifier_data.npz')
train_features = data['train_features']
train_labels = data['train_labels']
test_features = data['test_features']
test_labels = data['test_labels']

Step 1: Inspect the Data

In [5]:
# Step 2: Preprocess the data with PCA
train_labels = torch.tensor(train_labels).long()
test_labels = torch.tensor(test_labels).long()

# Fit the PCA model using 32 components and whiten = True from the instructions
pca = sklearn.decomposition.PCA(n_components=32, whiten=True)
pca_train_features = torch.tensor(pca.fit_transform(train_features)).float()
pca_test_features = torch.tensor(pca.transform(test_features)).float()



In [6]:
print(f"pca_train_features shape: {pca_train_features.shape}")
print(f"train_labels shape: {train_labels.shape}")


pca_train_features shape: torch.Size([15707, 32])
train_labels shape: torch.Size([15707])


In [7]:
# Step 3: Classifiers Using scikit-learn
import sklearn.linear_model
import sklearn.neural_network


linear_model_sklearn = sklearn.linear_model.LogisticRegression()
linear_model_sklearn.fit(pca_train_features, train_labels)

linear_sklearn_accuracy_train = linear_model_sklearn.score(pca_train_features, train_labels)
linear_sklearn_accuracy_test = linear_model_sklearn.score(pca_test_features, test_labels)
print(f"Linear Train Accuracy with scikit-learn : {linear_sklearn_accuracy_train}")
print(f"Linear Test Accuracy with scikit-learn : {linear_sklearn_accuracy_test}")

mlp_model_sklearn = sklearn.neural_network.MLPClassifier(hidden_layer_sizes=(100, 100, 100))
mlp_model_sklearn.fit(pca_train_features, train_labels)

mlp_sklearn_accuracy_train = mlp_model_sklearn.score(pca_train_features, train_labels)
mlp_sklearn_accuracy_test = mlp_model_sklearn.score(pca_test_features, test_labels)
print(f"MLP Train Accuracy with scikit-learn : {mlp_sklearn_accuracy_train}")
print(f"MLP Test Accuracy with scikit-learn : {mlp_sklearn_accuracy_test}")


Linear Train Accuracy with scikit-learn : 0.8553511173362195
Linear Test Accuracy with scikit-learn : 0.833976833976834
MLP Train Accuracy with scikit-learn : 0.9999363341185459
MLP Test Accuracy with scikit-learn : 0.824967824967825


In [8]:
# Step 4: Classifiers using PyTorch

linear_model_torch = torch.nn.Sequential(
    torch.nn.Linear(32, len(np.unique(train_labels))), # 32 inputs, number of train_label outputs
)
# Like in Lab 3.1, use Multi-Layer Perceptron (MLP) to implement all 100 hidden layers. 
mlp_model_torch = torch.nn.Sequential(
    torch.nn.Linear(32, 100), # 32 inputs, 1 hidden layer of size 100
    
    # hidden activation function, the magic happens
    torch.nn.ReLU(),
    
    # 100 inputs, number of train_label outputs
    torch.nn.Linear(100, len(np.unique(train_labels))) 
)


In [9]:
# Create a cross-entropy loss function and a stochastic gradient descent (SGD) optimizer
loss_function = torch.nn.CrossEntropyLoss()
lr = 1e-2
linear_opt = torch.optim.SGD(linear_model_torch.parameters(), lr=lr, weight_decay=0.001)
mlp_opt = torch.optim.SGD(mlp_model_torch.parameters(), lr=lr, weight_decay=0.001)


In [10]:
# a. Create TensorDataset and DataLoader to train and test splits
batch_size = 32

train_dataset = torch.utils.data.TensorDataset(pca_train_features, train_labels)
test_dataset = torch.utils.data.TensorDataset(pca_test_features, test_labels)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
# b. calculate model accuracy with data loader
def accuracy(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_X, batch_y in dataloader:
            z = model(batch_X)
            sample, predicted_labels = torch.max(z, dim=1)
            correct += (predicted_labels == batch_y).sum().item()
            total += batch_y.size(0)

    return correct/total


In [12]:
# c. Function to train model

def trainModel(model, train_loader, opt, loss_fn):
    model.train()
    epochs = 100
    for epoch in range(epochs):
        total_loss = 0

        for batch_X, batch_y in train_loader:
            opt.zero_grad()  # 1. zero out gradients

            z = model(batch_X)  # 2. Forward pass
            loss = loss_fn(z, batch_y)  # 3. Compute loss

            loss.backward()  # 4. Backpropagation
            opt.step()  # 5. Apply gradients

            total_loss += loss.item()
        train_accuracy = accuracy(model, train_loader)
        test_accuracy = accuracy(model, test_loader)
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}, "
              f"Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}")


In [13]:
trainModel(linear_model_torch, train_loader, linear_opt, loss_function)


Epoch 1/100, Loss: 1.5420, Train Accuracy: 0.7473, Test Accuracy: 0.7497
Epoch 2/100, Loss: 0.9776, Train Accuracy: 0.7898, Test Accuracy: 0.7934
Epoch 3/100, Loss: 0.8207, Train Accuracy: 0.8037, Test Accuracy: 0.7992
Epoch 4/100, Loss: 0.7498, Train Accuracy: 0.8137, Test Accuracy: 0.8057
Epoch 5/100, Loss: 0.7088, Train Accuracy: 0.8201, Test Accuracy: 0.8037
Epoch 6/100, Loss: 0.6817, Train Accuracy: 0.8249, Test Accuracy: 0.8069
Epoch 7/100, Loss: 0.6625, Train Accuracy: 0.8282, Test Accuracy: 0.8063
Epoch 8/100, Loss: 0.6480, Train Accuracy: 0.8299, Test Accuracy: 0.8095
Epoch 9/100, Loss: 0.6368, Train Accuracy: 0.8320, Test Accuracy: 0.8134
Epoch 10/100, Loss: 0.6275, Train Accuracy: 0.8332, Test Accuracy: 0.8160
Epoch 11/100, Loss: 0.6199, Train Accuracy: 0.8342, Test Accuracy: 0.8166
Epoch 12/100, Loss: 0.6135, Train Accuracy: 0.8349, Test Accuracy: 0.8179
Epoch 13/100, Loss: 0.6080, Train Accuracy: 0.8361, Test Accuracy: 0.8192
Epoch 14/100, Loss: 0.6035, Train Accuracy: 0.8

In [14]:
trainModel(mlp_model_torch, train_loader, mlp_opt, loss_function)



Epoch 1/100, Loss: 1.6230, Train Accuracy: 0.6598, Test Accuracy: 0.6622
Epoch 2/100, Loss: 1.0739, Train Accuracy: 0.7685, Test Accuracy: 0.7735
Epoch 3/100, Loss: 0.7996, Train Accuracy: 0.7987, Test Accuracy: 0.7967
Epoch 4/100, Loss: 0.6768, Train Accuracy: 0.8196, Test Accuracy: 0.8121
Epoch 5/100, Loss: 0.6106, Train Accuracy: 0.8293, Test Accuracy: 0.8108
Epoch 6/100, Loss: 0.5696, Train Accuracy: 0.8396, Test Accuracy: 0.8230
Epoch 7/100, Loss: 0.5418, Train Accuracy: 0.8470, Test Accuracy: 0.8320
Epoch 8/100, Loss: 0.5211, Train Accuracy: 0.8519, Test Accuracy: 0.8333
Epoch 9/100, Loss: 0.5051, Train Accuracy: 0.8561, Test Accuracy: 0.8372
Epoch 10/100, Loss: 0.4916, Train Accuracy: 0.8585, Test Accuracy: 0.8391
Epoch 11/100, Loss: 0.4802, Train Accuracy: 0.8606, Test Accuracy: 0.8411
Epoch 12/100, Loss: 0.4700, Train Accuracy: 0.8626, Test Accuracy: 0.8468
Epoch 13/100, Loss: 0.4613, Train Accuracy: 0.8650, Test Accuracy: 0.8507
Epoch 14/100, Loss: 0.4533, Train Accuracy: 0.8

In [15]:
print(f"train_features shape: {train_features.shape}, data type: {train_features.dtype}")
print(f"train_labels shape: {train_labels.shape}, data type: {train_labels.dtype}")
print(f"test_features shape: {test_features.shape}, data type: {test_features.dtype}")
print(f"test_labels shape: {test_labels.shape}, data type: {test_labels.dtype}")

print(f"train_features range: {train_features.min()} to {train_features.max()}")
print(f"test_features range: {test_features.min()} to {test_features.max()}")

print(f"Unique classes in train_labels: {np.unique(train_labels)}")
print(f"Unique classes in test_labels: {np.unique(test_labels)}")

train_class_counts = np.bincount(train_labels.numpy())  # Assuming train_labels is a tensor
test_class_counts = np.bincount(test_labels.numpy())  # Assuming test_labels is a tensor
print(f"Train class counts: {train_class_counts}")
print(f"Test class counts: {test_class_counts}")


train_features shape: (15707, 426), data type: int16
train_labels shape: torch.Size([15707]), data type: torch.int64
test_features shape: (1554, 426), data type: int16
test_labels shape: torch.Size([1554]), data type: torch.int64
train_features range: 0 to 14998
test_features range: 0 to 6908
Unique classes in train_labels: [0 1 2 3 4 5 6 7]
Unique classes in test_labels: [0 1 2 3 4 5 6 7]
Train class counts: [2519  821 1575 3980 2640   88  852 3232]
Test class counts: [389  30 278 404 100  22  43 288]


In [16]:
print(f"Linear Train Accuracy with scikit-learn: {linear_sklearn_accuracy_train}")
print(f"Linear Test Accuracy with scikit-learn: {linear_sklearn_accuracy_test}")

print(f"MLP Train Accuracy with scikit-learn : {mlp_sklearn_accuracy_train}")
print(f"MLP Test Accuracy with scikit-learn : {mlp_sklearn_accuracy_test}")

print(f"Linear Train Accuracy with PyTorch: {accuracy(linear_model_torch, train_loader)}")
print(f"Linear Test Accuracy with Pytorch: {accuracy(linear_model_torch, test_loader)}")

print(f"MLP Train Accuracy with Pytorch: {accuracy(mlp_model_torch, train_loader)}")
print(f"MLP Test Accuracy with Pytorch: {accuracy(mlp_model_torch, test_loader)}")

Linear Train Accuracy with scikit-learn: 0.8553511173362195
Linear Test Accuracy with scikit-learn: 0.833976833976834
MLP Train Accuracy with scikit-learn : 0.9999363341185459
MLP Test Accuracy with scikit-learn : 0.824967824967825
Linear Train Accuracy with PyTorch: 0.8480295409689947
Linear Test Accuracy with Pytorch: 0.8268983268983269
MLP Train Accuracy with Pytorch: 0.9181256764499904
MLP Test Accuracy with Pytorch: 0.8545688545688546
