In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from tqdm.notebook import tqdm


In [2]:
class WaterDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path):
        super().__init__()
        df = pd.read_csv(csv_path)
        self.data = df.to_numpy()
    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self,idx):
        feature = torch.tensor(self.data[idx,:-1],dtype=torch.float32) #all column except last one
        label = torch.tensor(self.data[idx,-1], dtype=torch.float32)
        return feature, label

In [3]:
dataset_train = WaterDataset("water_potability/water_train.csv")

dataloader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=2,
        shuffle=True
)





In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.fc1 = nn.Linear(9, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8,1)

        init.kaiming_uniform_(self.fc1.weight)
        init.kaiming_uniform_(self.fc2.weight)
        init.kaiming_uniform_(self.fc3.weight, nonlinearity="sigmoid")
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.sigmoid(self.fc3(x))
        return x


In [5]:
epochs = 100


epoch_bar = tqdm(desc="epoch_bar", total = epochs, position=0)
train_bar = tqdm(desc="train_bar", total = len(dataloader_train), position=1)


device = torch.device("cuda")
net = Net().to(device)
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(),lr=0.01)


for epoch in range(1000):
    for feature, label in dataloader_train:
        #forward pass
        feature = feature.to(device)
        label = label.to(device)
        output = net(feature) #forward
        loss = criterion(output, label.view(-1, 1))

        #backward pass
        loss.backward()
        optimizer.step() #optimize
        optimizer.zero_grad() #clear gradient

        train_bar.update()
        train_bar.set_postfix(loss=loss.item())
    train_bar.n = 0
    epoch_bar.update()


epoch_bar:   0%|          | 0/100 [00:00<?, ?it/s]

train_bar:   0%|          | 0/754 [00:00<?, ?it/s]

In [6]:
from torchmetrics import Accuracy

acc = Accuracy(task='binary').to(device)

net.eval()

dataset_test = WaterDataset("water_potability/water_test.csv")

dataloader_test = torch.utils.data.DataLoader(
        dataset_test,
        batch_size=2,
        shuffle=True
)



with torch.no_grad():
    for feature, label in dataloader_test:

        feature = feature.to(device)
        label = label.to(device)

        output = net(feature)

        pred = (output>=0.5).float() #true or false, float() to convert 1 and 0

        acc(pred, label.view(-1, 1))
accuracy = acc.compute()
print(f"Accuracy: {accuracy}")

Accuracy: 0.6640158891677856
