In [9]:
import pandas as pd
from tqdm import tqdm
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score

from minigrad.loss import BCELoss
from minigrad.nn import MLP
from minigrad.optim import ADAM

In [None]:
breast_cancer = load_breast_cancer()

X = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)
y = pd.DataFrame(breast_cancer.target, columns=['TARGET'])

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.1,
    random_state=8,
    shuffle=True,
    stratify=y
)

# normalize all features
scaler = StandardScaler() # scaling is important here because math.exp() overflows for arguments>700
X_train[breast_cancer.feature_names] = scaler.fit_transform(X_train[breast_cancer.feature_names])
X_test[breast_cancer.feature_names] = scaler.transform(X_test[breast_cancer.feature_names])

In [3]:
classifier = MLP(
    nin=len(X_train.columns),
    nouts=[16, 1],
    activation='sigmoid',
    final_layer_activation='sigmoid'
)

#setup hyperparameters
epochs = 10

# setup loss function and optimizer
bce_loss = BCELoss()
optimizer = ADAM(
    params=classifier.parameters()
)

actuals = [int(v) for v in list(y_train.TARGET.values)]

for e in tqdm(range(epochs), total=epochs):
    # forward pass
    ypreds = [classifier(x.values) for _, x in X_train.iterrows()]
    loss = bce_loss(actuals, ypreds)

    # zero grad
    classifier.zero_grad()

    # backward prop
    loss.backward()

    # recalculate new values for all params
    optimizer.step()

    print(f'Epoch: {e}, Loss: {loss}')

 10%|█         | 1/10 [00:04<00:40,  4.55s/it]

Epoch: 0, Loss: Value(data=0.675142696337288, grad=1, label=)


 20%|██        | 2/10 [00:09<00:38,  4.77s/it]

Epoch: 1, Loss: Value(data=0.49757587929753555, grad=1, label=)


 30%|███       | 3/10 [00:16<00:41,  5.90s/it]

Epoch: 2, Loss: Value(data=0.35650581724856284, grad=1, label=)


 40%|████      | 4/10 [00:22<00:35,  5.91s/it]

Epoch: 3, Loss: Value(data=0.2623154630928383, grad=1, label=)


 50%|█████     | 5/10 [00:29<00:31,  6.32s/it]

Epoch: 4, Loss: Value(data=0.19853770522388492, grad=1, label=)


 60%|██████    | 6/10 [00:37<00:27,  6.76s/it]

Epoch: 5, Loss: Value(data=0.1522742688996242, grad=1, label=)


 70%|███████   | 7/10 [00:43<00:19,  6.51s/it]

Epoch: 6, Loss: Value(data=0.11933238988980067, grad=1, label=)


 80%|████████  | 8/10 [00:50<00:13,  6.70s/it]

Epoch: 7, Loss: Value(data=0.09743193580896609, grad=1, label=)


 90%|█████████ | 9/10 [00:57<00:06,  6.97s/it]

Epoch: 8, Loss: Value(data=0.08568154114174314, grad=1, label=)


100%|██████████| 10/10 [01:05<00:00,  6.59s/it]

Epoch: 9, Loss: Value(data=0.08131089652750098, grad=1, label=)





In [5]:
all_res = list()
X_test.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)
for counter, x in X_test.iterrows():
    pred = 0 if classifier(x.values).data < 0.5 else 1
    actual = int(y_test.iloc[counter, :].TARGET)

    res = {
        'PREDICTED': pred,
        'ACTUAL': actual
    }

    all_res.append(res)

In [11]:
res_df = pd.DataFrame.from_records(all_res)
res_df['IS_CORRECT'] = res_df.apply(lambda row: int(row['PREDICTED'] == row['ACTUAL']), axis = 1)
print(f'Accuracy: {len(res_df.loc[res_df.IS_CORRECT == 1])/len(res_df)}')
precision = precision_score(res_df.ACTUAL.values, res_df.PREDICTED.values)
recall = recall_score(res_df.ACTUAL.values, res_df.PREDICTED.values)
print(f'Precision: {precision}, Recall: {recall}')

Accuracy: 0.9649122807017544
Precision: 1.0, Recall: 0.9444444444444444
