In [1]:
import pandas as pd

data = pd.read_csv('Crop_Dataset.csv')

from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, train_size=0.9)

train_data.shape, test_data.shape

((1980, 12), (220, 12))

In [2]:
n_classes = len(data['Label_Encoded'].unique())
n_classes

22

In [3]:
import torch

device = (
    "mps" if torch.cuda.is_available() else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [4]:
from torch.utils.data import Dataset
from torch.nn.functional import normalize

class CustomDataset(Dataset):
    def __init__(self, data):
        self.inputs = data.iloc[:, 0:-2].values.astype('float32')
        
        # for i in range(self.inputs.shape[1]):
        #     self.inputs[:, i] = normalize(torch.tensor(self.inputs[:, i].reshape(-1, 1), device=device), dim=0).reshape(-1)
        # do min max normalization
        for i in range(self.inputs.shape[1]):
            max_val = self.inputs[:, i].max()
            min_val = self.inputs[:, i].min()
            self.inputs[:, i] = (self.inputs[:, i] - min_val) / (max_val - min_val)
        
        self.outputs = data.iloc[:, -1].values.astype('int')

    def __getitem__(self, index):
        return self.inputs[index], self.outputs[index]

    def __len__(self):
        return len(self.inputs)

In [5]:
batch_size = 64

train_data_loader = torch.utils.data.DataLoader(CustomDataset(train_data), batch_size=batch_size, shuffle=True)
test_data_loader = torch.utils.data.DataLoader(CustomDataset(test_data), batch_size=batch_size, shuffle=True)

In [6]:
# print shapes and samples from the dataset
for X, y in train_data_loader:
    print("Shape of X:", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    print("Sample X: ", X[0])
    print("Sample y: ", y[0])
    break

Shape of X: torch.Size([64, 10])
Shape of y:  torch.Size([64]) torch.int64
Sample X:  tensor([0.0000, 0.4286, 0.0950, 0.5644, 0.5622, 0.6744, 0.1183, 0.1957, 0.4004,
        0.3539])
Sample y:  tensor(8)


In [7]:
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(10, 512),
            nn.Sigmoid(),
            nn.Linear(512, 512),
            nn.Sigmoid(),
            nn.Linear(512, 22)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [8]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=10, out_features=512, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=512, out_features=22, bias=True)
  )
)


In [9]:
# testing predictions

_X = torch.rand(1, 10, device=device)
logits = model(_X)
predicted_prob = nn.Softmax(dim=1)(logits)
y_pred = predicted_prob.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([18])


In [10]:
learning_rate = 1e-2
batch_size = 64
epochs = 500

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [11]:
import wandb

configs = {
    "learning_rate": learning_rate,
    "architecture": "ANN",
    "dataset": "Crop",
    "epochs": epochs,
    "batch_size": batch_size
}

wandb.init(
    project="Intellihack_1",
    config=configs
)

[34m[1mwandb[0m: Currently logged in as: [33mlakshith[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
def train_loop(dataloader):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss, current = loss.item(), batch * batch_size + len(X)
        
        wandb.log({
            "Loss": loss
        })
        print(f"\rloss: {loss:>7f}  [{current:>5d}/{size:>5d}]", end="")

In [13]:
def test_loop(dataloader):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    
    print("\n",{"test_loss": test_loss, "test_acc": correct})

In [14]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    print("")
    train_loop(train_data_loader)
    test_loop(test_data_loader)

Epoch 1
-------------------------------
loss: 3.097483  [ 1980/ 1980]
 {'test_loss': 3.1118602752685547, 'test_acc': 0.03636363636363636}
Epoch 2
-------------------------------

loss: 3.089077  [ 1980/ 1980]
 {'test_loss': 3.0994741916656494, 'test_acc': 0.02727272727272727}
Epoch 3
-------------------------------

loss: 3.084726  [ 1980/ 1980]
 {'test_loss': 3.1021485328674316, 'test_acc': 0.031818181818181815}
Epoch 4
-------------------------------

loss: 3.097326  [ 1980/ 1980]
 {'test_loss': 3.1006672978401184, 'test_acc': 0.02727272727272727}
Epoch 5
-------------------------------

loss: 3.110191  [ 1980/ 1980]
 {'test_loss': 3.0965540409088135, 'test_acc': 0.045454545454545456}
Epoch 6
-------------------------------

loss: 3.093148  [ 1980/ 1980]
 {'test_loss': 3.1012951731681824, 'test_acc': 0.04090909090909091}
Epoch 7
-------------------------------

loss: 3.094119  [ 1980/ 1980]
 {'test_loss': 3.099958121776581, 'test_acc': 0.02727272727272727}
Epoch 8
-------------------