In [1]:
import pandas as pd

data = pd.read_csv('Crop_Dataset.csv')

from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, train_size=0.8, random_state=10)
validation_data, test_data = train_test_split(test_data, train_size=0.5, random_state=10)

train_data.shape, test_data.shape, validation_data.shape

((1760, 12), (220, 12), (220, 12))

In [2]:
n_classes = len(data['Label_Encoded'].unique())
n_classes

22

In [4]:
import torch

device = "mps"
print(f"Using {device} device")

Using mps device


In [4]:
# manual normalization

min_values = []
max_values = []

for i in range(10):
    min_values.append(data.iloc[:, i].min())
    max_values.append(data.iloc[:, i].max())
    
print(min_values, max_values)

[0, 5, 5, 8.825674745, 14.25803981, 3.504752314, 20.21126747, 17, 247.6131816, 3.054532525] [140, 145, 205, 43.67549305, 99.98187601, 9.93509073, 298.5601175, 385, 4073.159566, 5.702315124]


In [5]:
from torch.utils.data import Dataset
from torch.nn.functional import normalize

class CustomDataset(Dataset):
    def __init__(self, data):
        self.inputs = data.iloc[:, 0:-2].values.astype('float32')
        
        # for i in range(self.inputs.shape[1]):
        #     self.inputs[:, i] = normalize(torch.tensor(self.inputs[:, i].reshape(-1, 1)), dim=0).reshape(-1)
        
        for i in range(self.inputs.shape[1]):
            max_val = max_values[i]
            min_val = min_values[i]
            self.inputs[:, i] = (self.inputs[:, i] - min_val) / (max_val - min_val)
        
        self.outputs = data.iloc[:, -1].values.astype('int')
        
        self.inputs = torch.tensor(self.inputs, device=device)
        self.outputs = torch.tensor(self.outputs, device=device)

    def __getitem__(self, index):
        return self.inputs[index], self.outputs[index]

    def __len__(self):
        return len(self.inputs)

In [6]:
batch_size = 128

train_data_loader = torch.utils.data.DataLoader(CustomDataset(train_data), batch_size=batch_size, shuffle=True)
test_data_loader = torch.utils.data.DataLoader(CustomDataset(test_data), batch_size=batch_size, shuffle=True)
validation_data_loader = torch.utils.data.DataLoader(CustomDataset(validation_data), batch_size=batch_size, shuffle=True)

In [7]:
# print shapes and samples from the dataset
for X, y in train_data_loader:
    print("Shape of X:", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    print("Sample X: ", X[0])
    print("Sample y: ", y[0])
    break

Shape of X: torch.Size([128, 10])
Shape of y:  torch.Size([128]) torch.int64
Sample X:  tensor([0.2786, 0.2286, 0.0500, 0.5788, 0.8111, 0.5158, 0.1424, 0.2011, 0.5704,
        0.3980], device='mps:0')
Sample y:  tensor(6, device='mps:0')


In [8]:
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(10, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 22),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [9]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=10, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=22, bias=True)
  )
)


In [10]:
# testing predictions

_X = torch.rand(1, 10, device=device)
logits = model(_X)
predicted_prob = nn.Softmax(dim=1)(logits)
y_pred = predicted_prob.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([5], device='mps:0')


In [11]:
learning_rate = 1e-2
batch_size = 64
epochs = 500

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [12]:
import wandb

configs = {
    "learning_rate": learning_rate,
    "architecture": "ANN",
    "dataset": "Crop",
    "epochs": epochs,
    "batch_size": batch_size
}

wandb.init(
    project="Intellihack_1",
    config=configs
)

[34m[1mwandb[0m: Currently logged in as: [33mlakshith[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [13]:
def train_loop(dataloader):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss, current = loss.item(), batch * batch_size + len(X)
        
        wandb.log({
            "Loss": loss
        })
        print(f"\rloss: {loss:>7f}  [{current:>5d}/{size:>5d}]", end="")

In [14]:
def test_loop(dataloader):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    
    wandb.log({
        "Test Loss": test_loss,
        "Test Accuracy": correct
    })
    
    print("Test: \nAccuracy: ", correct, "\nAvg loss: ", test_loss)

In [15]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    print("")
    train_loop(train_data_loader)
    test_loop(test_data_loader)

print("\n\n\nValidation Data: ")
test_loop(validation_data_loader)

Epoch 1
-------------------------------
loss: 3.089881  [  928/ 1760]Test: 
Accuracy:  0.03636363636363636 
Avg loss:  3.092055082321167
Epoch 2
-------------------------------

loss: 3.088497  [  928/ 1760]Test: 
Accuracy:  0.06363636363636363 
Avg loss:  3.0833152532577515
Epoch 3
-------------------------------

loss: 3.063095  [  928/ 1760]Test: 
Accuracy:  0.07727272727272727 
Avg loss:  3.076580047607422
Epoch 4
-------------------------------

loss: 3.063967  [  928/ 1760]Test: 
Accuracy:  0.10909090909090909 
Avg loss:  3.0689709186553955
Epoch 5
-------------------------------

loss: 3.060498  [  928/ 1760]Test: 
Accuracy:  0.12727272727272726 
Avg loss:  3.0622379779815674
Epoch 6
-------------------------------

loss: 3.059889  [  928/ 1760]Test: 
Accuracy:  0.17727272727272728 
Avg loss:  3.054377555847168
Epoch 7
-------------------------------

loss: 3.042124  [  928/ 1760]Test: 
Accuracy:  0.20909090909090908 
Avg loss:  3.047690987586975
Epoch 8
------------------------

In [7]:
import joblib

joblib.dump(model, 'model.joblib')