In [1]:
import numpy as np

import pandas as pd
import torch
import torch.nn as nn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, \
    f1_score

In [2]:
# Data loader class

class BreastCancerDataset():
    def load_data(self):
        """Loads raw data from the breast cancer dataset."""
        data = load_breast_cancer()
        X = data.data
        y = data.target
        return X, y

    def make_split(self, test_size=0.2, random_state=42):
        """Splits the data into training and testing sets."""
        X, y = self.load_data()
        X_train, X_rest, y_train, y_rest = train_test_split(X, y, test_size=test_size, random_state=random_state)
        X_test, X_val, y_test, y_val = train_test_split(X_rest, y_rest, test_size=0.5, random_state=random_state)

        # Normalize data after splitting to prevent data leakage
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_val = scaler.transform(X_val)
        X_test = scaler.transform(X_test)

        X_train = torch.tensor(X_train, dtype=torch.float32)
        X_val = torch.tensor(X_val, dtype=torch.float32)
        X_test = torch.tensor(X_test, dtype=torch.float32)

        y_train = torch.tensor(y_train, dtype=torch.long)
        y_val = torch.tensor(y_val, dtype=torch.long)
        y_test = torch.tensor(y_test, dtype=torch.long)

        return X_train, X_test, X_val, y_train, y_test, y_val
