In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

Data extraction and preparation

In [2]:
data = pd.read_csv('working_dataset.csv')

X = data.drop(columns=['Crop_ID', 'Polygon_ID'])
y = data['Crop_ID']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12229974)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Random forest classification

In [3]:
rf = RandomForestClassifier(random_state=12229974)

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
}

# Grid search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2, scoring='accuracy')
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [4]:
best_rf = grid_search.best_estimator_
y_pred_rf = best_rf.predict(X_test)
print(f"Random Forest Best Parameters: {grid_search.best_params_}")
print(f"Random Forest Test Accuracy: {accuracy_score(y_test, y_pred_rf) * 100:.2f}%")
print(classification_report(y_test, y_pred_rf))

Random Forest Best Parameters: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 200}
Random Forest Test Accuracy: 74.75%
              precision    recall  f1-score   support

         1.0       0.75      0.74      0.74      1011
         2.0       0.78      0.75      0.76      1020
         4.0       0.75      0.69      0.72      1035
         5.0       0.67      0.71      0.69       998
         8.0       0.80      0.80      0.80       990
        11.0       0.89      0.85      0.87       626
        12.0       0.67      0.74      0.70      1008

    accuracy                           0.75      6688
   macro avg       0.76      0.75      0.75      6688
weighted avg       0.75      0.75      0.75      6688



Time Series Transformer

In [5]:
# Encode Crop_ID labels
# Example: 1 --> 0, 2 --> 1, 3 --> 2, 4 --> 3, 8 --> 4, 11 --> 5, 12 --> 6
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [6]:
# Dataset class for radar signals
class RadarDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [7]:
train_dataset = RadarDataset(X_train, y_train)
test_dataset = RadarDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

In [8]:
# TST model
class TSTModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model=128, n_heads=4, num_layers=3, dropout=0.1):
        super(TSTModel, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer_encoder(x.unsqueeze(1))
        x = x.mean(dim=1)
        return self.fc(x)

In [9]:
def train_tst_model(d_model, num_layers, learning_rate):
    model = TSTModel(input_dim=X_train.shape[1], output_dim=len(set(y_train)), d_model=d_model, n_heads=4, num_layers=num_layers, dropout=0.1)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    model.train()
    for epoch in range(10):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/10], Loss: {running_loss/len(train_loader):.4f}')

    return model

In [10]:
def evaluate_tst_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    accuracy = 100 * correct / total
    print(f"TST Model Accuracy: {accuracy:.2f}%")
    return accuracy

In [11]:
hyperparameter_grid = {
    'd_model': [64, 128],
    'num_layers': [2, 3],
    'learning_rate': [0.0001, 0.001]
}

best_tst_accuracy = 0
best_tst_params = {}

# Grid search
for d_model in hyperparameter_grid['d_model']:
    for num_layers in hyperparameter_grid['num_layers']:
        for learning_rate in hyperparameter_grid['learning_rate']:
            print(f"Testing TST hyperparameters: d_model={d_model}, num_layers={num_layers}, learning_rate={learning_rate}")
            tst_model = train_tst_model(d_model, num_layers, learning_rate)
            accuracy = evaluate_tst_model(tst_model)

            if accuracy > best_tst_accuracy:
                best_tst_accuracy = accuracy
                best_tst_params = {
                    'd_model': d_model,
                    'num_layers': num_layers,
                    'learning_rate': learning_rate
                }

Testing TST hyperparameters: d_model=64, num_layers=2, learning_rate=0.0001




Epoch [1/10], Loss: 0.9562
Epoch [2/10], Loss: 0.7424
Epoch [3/10], Loss: 0.6914
Epoch [4/10], Loss: 0.6593
Epoch [5/10], Loss: 0.6374
Epoch [6/10], Loss: 0.6168
Epoch [7/10], Loss: 0.6071
Epoch [8/10], Loss: 0.5883
Epoch [9/10], Loss: 0.5774
Epoch [10/10], Loss: 0.5633
TST Model Accuracy: 78.75%
Testing TST hyperparameters: d_model=64, num_layers=2, learning_rate=0.001




Epoch [1/10], Loss: 0.8248
Epoch [2/10], Loss: 0.6908
Epoch [3/10], Loss: 0.6430
Epoch [4/10], Loss: 0.6169
Epoch [5/10], Loss: 0.5813
Epoch [6/10], Loss: 0.5543
Epoch [7/10], Loss: 0.5416
Epoch [8/10], Loss: 0.5074
Epoch [9/10], Loss: 0.4841
Epoch [10/10], Loss: 0.4640
TST Model Accuracy: 78.02%
Testing TST hyperparameters: d_model=64, num_layers=3, learning_rate=0.0001
Epoch [1/10], Loss: 0.9236
Epoch [2/10], Loss: 0.7312
Epoch [3/10], Loss: 0.6801
Epoch [4/10], Loss: 0.6499
Epoch [5/10], Loss: 0.6263
Epoch [6/10], Loss: 0.6065
Epoch [7/10], Loss: 0.5867
Epoch [8/10], Loss: 0.5743
Epoch [9/10], Loss: 0.5605
Epoch [10/10], Loss: 0.5474
TST Model Accuracy: 78.44%
Testing TST hyperparameters: d_model=64, num_layers=3, learning_rate=0.001
Epoch [1/10], Loss: 0.8570
Epoch [2/10], Loss: 0.7196
Epoch [3/10], Loss: 0.6660
Epoch [4/10], Loss: 0.6351
Epoch [5/10], Loss: 0.6132
Epoch [6/10], Loss: 0.5840
Epoch [7/10], Loss: 0.5576
Epoch [8/10], Loss: 0.5323
Epoch [9/10], Loss: 0.5075
Epoch [10/

In [12]:
print(f"Best TST Model Accuracy: {best_tst_accuracy:.2f}% with parameters: {best_tst_params}")

Best TST Model Accuracy: 79.25% with parameters: {'d_model': 128, 'num_layers': 3, 'learning_rate': 0.0001}
