In [1]:
import numpy as np
import sklearn
import torch
import os

In [2]:
if not os.path.exists('tree_species_classifier_data.npz'):
  !wget -O tree_species_classifier_data.npz "https://www.dropbox.com/scl/fi/b7mw23k3ifaeui9m8nnn3/tree_species_classifier_data.npz?rlkey=bgxp37c1t04i7q35waf3slc26&dl=1"

--2025-01-27 07:12:28--  https://www.dropbox.com/scl/fi/b7mw23k3ifaeui9m8nnn3/tree_species_classifier_data.npz?rlkey=bgxp37c1t04i7q35waf3slc26&dl=1
Resolving www.dropbox.com (www.dropbox.com)... 162.125.65.18, 2620:100:6021:18::a27d:4112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.65.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://uc287e6022c6b04207a41d9e1729.dl.dropboxusercontent.com/cd/0/inline/Ci_g78WitUZw7hr5jc9NiJtjOu4dqtYENlg1jS-4h-ctDjD1sdukMNMHACVUcYVGd_NoAhq74kV9Qu-IJnXtJAmNC27yje7IaVBPQR7UwbxugVFq6e2hR115ow_n779MeGf2XW9yjW8pXhE57KkASL-u/file?dl=1# [following]
--2025-01-27 07:12:29--  https://uc287e6022c6b04207a41d9e1729.dl.dropboxusercontent.com/cd/0/inline/Ci_g78WitUZw7hr5jc9NiJtjOu4dqtYENlg1jS-4h-ctDjD1sdukMNMHACVUcYVGd_NoAhq74kV9Qu-IJnXtJAmNC27yje7IaVBPQR7UwbxugVFq6e2hR115ow_n779MeGf2XW9yjW8pXhE57KkASL-u/file?dl=1
Resolving uc287e6022c6b04207a41d9e1729.dl.dropboxusercontent.com (uc287e6022c6b04207a41d9e1729.dl.dropboxus

In [3]:
data = np.load('tree_species_classifier_data.npz')
train_features = data['train_features']
train_labels = data['train_labels']
test_features = data['test_features']
test_labels = data['test_labels']

In [4]:
#2. Pre Process Data
pca = sklearn.decomposition.PCA(n_components=32, whiten=True)
pca.fit(train_features)
pca_train_features = pca.transform(train_features)
pca_test_features = pca.transform(test_features)
print("Train features shape:", pca_train_features.shape)
print("Test features shape:", pca_test_features.shape)

Train features shape: (15707, 32)
Test features shape: (1554, 32)


In [9]:
#3. Classifiers using Scikit-learn
linear_classifier = sklearn.linear_model.LogisticRegression().fit(pca_train_features, train_labels)
neural_network = sklearn.neural_network.MLPClassifier(hidden_layer_sizes=(100), activation="relu", max_iter=1000).fit(pca_train_features, train_labels)

print("Linear classifier accuracy:", linear_classifier.score(pca_test_features, test_labels))
print("Neural network accuracy:", neural_network.score(pca_test_features, test_labels))

Linear classifier accuracy: 0.833976833976834
Neural network accuracy: 0.8043758043758044


In [10]:
#4. Classifiers using PyTorch
X_train = torch.tensor(pca_train_features, dtype=torch.float32)
y_train = torch.tensor(train_labels, dtype=torch.long)
X_test = torch.tensor(pca_test_features, dtype=torch.float32)
y_test = torch.tensor(test_labels, dtype=torch.long)

In [11]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [12]:
def get_accuracy(model, data_loader):
  correct = 0
  total = 0
  model.eval()
  with torch.no_grad():
    for inputs, labels in data_loader:
      outputs = model(inputs)
      predicted = torch.argmax(outputs.data, dim=1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  return correct / total

In [13]:
def train_model(model, train_loader, test_loader, num_epochs=100):
  loss_fn = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=3e-2, weight_decay=0.001)
  for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = loss_fn(outputs, labels)
      loss.backward()
      optimizer.step()

    train_acc = get_accuracy(model, train_loader)
    test_acc = get_accuracy(model, test_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

In [14]:
linear_model = torch.nn.Linear(32, 8)
train_model(linear_model, train_loader, test_loader)

nn_model = torch.nn.Sequential(
    torch.nn.Linear(32, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, 8)
)
train_model(nn_model, train_loader, test_loader)

Epoch 1/100, Train Acc: 0.8101, Test Acc: 0.7999
Epoch 2/100, Train Acc: 0.8269, Test Acc: 0.8050
Epoch 3/100, Train Acc: 0.8327, Test Acc: 0.8115
Epoch 4/100, Train Acc: 0.8356, Test Acc: 0.8153
Epoch 5/100, Train Acc: 0.8378, Test Acc: 0.8160
Epoch 6/100, Train Acc: 0.8401, Test Acc: 0.8205
Epoch 7/100, Train Acc: 0.8415, Test Acc: 0.8224
Epoch 8/100, Train Acc: 0.8431, Test Acc: 0.8224
Epoch 9/100, Train Acc: 0.8443, Test Acc: 0.8250
Epoch 10/100, Train Acc: 0.8447, Test Acc: 0.8269
Epoch 11/100, Train Acc: 0.8449, Test Acc: 0.8263
Epoch 12/100, Train Acc: 0.8448, Test Acc: 0.8269
Epoch 13/100, Train Acc: 0.8454, Test Acc: 0.8263
Epoch 14/100, Train Acc: 0.8456, Test Acc: 0.8250
Epoch 15/100, Train Acc: 0.8458, Test Acc: 0.8269
Epoch 16/100, Train Acc: 0.8469, Test Acc: 0.8243
Epoch 17/100, Train Acc: 0.8474, Test Acc: 0.8288
Epoch 18/100, Train Acc: 0.8476, Test Acc: 0.8243
Epoch 19/100, Train Acc: 0.8473, Test Acc: 0.8263
Epoch 20/100, Train Acc: 0.8466, Test Acc: 0.8288
Epoch 21/