In [1]:
from sklearn.datasets import make_classification
import torch

In [2]:
X, y = make_classification(
    n_samples=10,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=2,
    random_state=42,
)

In [4]:
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

In [5]:
X

tensor([[ 1.0683, -0.9701],
        [-1.1402, -0.8388],
        [-2.8954,  1.9769],
        [-0.7206, -0.9606],
        [-1.9629, -0.9923],
        [-0.9382, -0.5430],
        [ 1.7273, -1.1858],
        [ 1.7774,  1.5116],
        [ 1.8997,  0.8344],
        [-0.5872, -1.9717]])

In [6]:
y

tensor([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [9]:
from torch.utils.data import Dataset, DataLoader

In [14]:
class CustomData(Dataset):

  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [15]:
dataset = CustomData(X, y)

In [16]:
len(dataset)

10

In [17]:
dataset[4]

(tensor([-1.9629, -0.9923]), tensor(0))

In [18]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [20]:
for batch_feature, batch_lablel  in dataloader:
  print(batch_feature)
  print(batch_lablel)
  print('-'*50)

tensor([[-0.9382, -0.5430],
        [ 1.0683, -0.9701]])
tensor([1, 1])
--------------------------------------------------
tensor([[-1.1402, -0.8388],
        [ 1.7774,  1.5116]])
tensor([0, 1])
--------------------------------------------------
tensor([[-1.9629, -0.9923],
        [ 1.7273, -1.1858]])
tensor([0, 1])
--------------------------------------------------
tensor([[ 1.8997,  0.8344],
        [-0.5872, -1.9717]])
tensor([1, 0])
--------------------------------------------------
tensor([[-0.7206, -0.9606],
        [-2.8954,  1.9769]])
tensor([0, 0])
--------------------------------------------------


**Breast Cancer training with Dataloader**

In [21]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn

In [22]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:], df.iloc[:, 0], test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))

In [23]:
class CustomData(Dataset):

  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [24]:
train_dataset = CustomData(X_train_tensor, y_train_tensor)
test_dataset = CustomData(X_test_tensor, y_test_tensor)

In [28]:
len(train_dataset)

455

In [29]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [31]:
class MySimpleNN(nn.Module):

  def __init__(self, num_features):
    super().__init__()
    self.linear = nn.Linear(num_features, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, features):
    out = self.linear(features)
    out = self.sigmoid(out)
    return out



In [33]:
# Parameters
learning_rate = 0.1
epochs = 25

In [36]:
model = MySimpleNN(X_train_tensor.shape[1])
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_function = torch.nn.BCELoss()



In [37]:
for epoch in range(epochs):
  for batch_feature, batch_lablel in train_loader:
    y_pred = model(batch_feature)
    loss = loss_function(y_pred, batch_lablel.view(-1, 1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f'Epochs: {epoch + 1}, Loss: {loss.item()}')

Epochs: 1, Loss: 0.13492751121520996
Epochs: 2, Loss: 0.06148868426680565
Epochs: 3, Loss: 0.1467972993850708
Epochs: 4, Loss: 0.07200119644403458
Epochs: 5, Loss: 0.17420558631420135
Epochs: 6, Loss: 0.43365272879600525
Epochs: 7, Loss: 0.04663615673780441
Epochs: 8, Loss: 0.2739345133304596
Epochs: 9, Loss: 0.16266460716724396
Epochs: 10, Loss: 0.08301521092653275
Epochs: 11, Loss: 0.07841888815164566
Epochs: 12, Loss: 0.16899462044239044
Epochs: 13, Loss: 0.4203960597515106
Epochs: 14, Loss: 0.11374886333942413
Epochs: 15, Loss: 0.016370853409171104
Epochs: 16, Loss: 0.03050500713288784
Epochs: 17, Loss: 0.6986904740333557
Epochs: 18, Loss: 0.013031721115112305
Epochs: 19, Loss: 0.13340796530246735
Epochs: 20, Loss: 0.03985001519322395
Epochs: 21, Loss: 0.0021466983016580343
Epochs: 22, Loss: 0.0045988233759999275
Epochs: 23, Loss: 0.018925894051790237
Epochs: 24, Loss: 0.6425909996032715
Epochs: 25, Loss: 0.6901746988296509


In [41]:
# Evaluate
model.eval()
accuracy_list = []
with torch.no_grad():

  for batch_features, batch_labels in test_loader:
    y_pred = model(batch_features)
    y_pred = (y_pred > 0.8).float()

    batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean()
    accuracy_list.append(batch_accuracy)
  overall_accuracy = sum(accuracy_list) / len(accuracy_list)
  print(f'Overall Accuracy: {overall_accuracy}')


Overall Accuracy: 0.96875
