In [None]:
#learning how dataset and dataloader works

In [35]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
import torch
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
#custom dataset class
class CustomDataset(Dataset):

    def __init__(self,features,labels):
        self.features=features
        self.labels=labels

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, index):
        return self.features[index],self.labels[index]


In [49]:
#loading data
df = pd.read_csv('breast-cancer.csv')

x = df.drop(columns=['id','diagnosis'])
x = np.array(x,dtype=np.float32)

le = LabelEncoder()
y = le.fit_transform(df['diagnosis'])
y = np.array(y)

#splitting dataset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

#normalizing data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#converting to tensors
X_train = torch.tensor(X_train)
Y_train = torch.tensor(y_train)
X_test = torch.tensor(X_test)
Y_test = torch.tensor(y_test)

In [50]:
#using classes

#making the dataset
dataset = CustomDataset(X_train,Y_train)

#creating the dataloader
dataloader = DataLoader(dataset,batch_size=32,shuffle=True)

In [62]:
class model(nn.Module):

    def __init__(self,input,output):
        super().__init__()
        self.layer1=nn.Linear(input,16)
        self.layer2=nn.Linear(16,8)
        self.layer3=nn.Linear(8,4)
        self.layer4=nn.Linear(4,output)

    def forward(self,X):
        out=F.relu(self.layer1(X))
        out=F.relu(self.layer2(out))
        out=F.relu(self.layer3(out))
        return self.layer4(out)

#hyperparameters
net = model(30,2)
n_epochs=100
learning_rate=0.01
loss=nn.CrossEntropyLoss()
optimizer=optim.Adam(net.parameters(),lr=learning_rate)

In [73]:
#training loop
for epoch in range(n_epochs):

    for batch_x,batch_y in dataloader:
        y_hat=net(batch_x)
        l=loss(y_hat,batch_y)
        l.backward()
        optimizer.step()
        optimizer.zero_grad()

    if (epoch+1)%10==0:
            print(f"Epoch [{epoch+1}], Loss: {l.item():.4f}")

Epoch [10], Loss: 0.0000
Epoch [20], Loss: 0.0000
Epoch [30], Loss: 0.0000
Epoch [40], Loss: 0.0000
Epoch [50], Loss: 0.0000
Epoch [60], Loss: 0.0000
Epoch [70], Loss: 0.0000
Epoch [80], Loss: 0.0000
Epoch [90], Loss: 0.0000
Epoch [100], Loss: 0.0000


In [70]:
#evaluation for training set
net.eval()

with torch.no_grad():
    outputs = net(X_train)
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == Y_train).sum().item()
    total = Y_train.size(0)

    print(f"Training Accuracy: {100 * correct / total:.2f}%")

Training Accuracy: 100.00%


In [71]:
#evaluation for test set
net.eval()

with torch.no_grad():
    outputs = net(X_test)
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == Y_test).sum().item()
    total = Y_test.size(0)

    print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 97.37%
