In [1]:
with open("wdbc.names") as f:
    print(f.read())

1. Title: Wisconsin Diagnostic Breast Cancer (WDBC)

2. Source Information

a) Creators: 

	Dr. William H. Wolberg, General Surgery Dept., University of
	Wisconsin,  Clinical Sciences Center, Madison, WI 53792
	wolberg@eagle.surgery.wisc.edu

	W. Nick Street, Computer Sciences Dept., University of
	Wisconsin, 1210 West Dayton St., Madison, WI 53706
	street@cs.wisc.edu  608-262-6619

	Olvi L. Mangasarian, Computer Sciences Dept., University of
	Wisconsin, 1210 West Dayton St., Madison, WI 53706
	olvi@cs.wisc.edu 

b) Donor: Nick Street

c) Date: November 1995

3. Past Usage:

first usage:

	W.N. Street, W.H. Wolberg and O.L. Mangasarian 
	Nuclear feature extraction for breast tumor diagnosis.
	IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science
	and Technology, volume 1905, pages 861-870, San Jose, CA, 1993.

OR literature:

	O.L. Mangasarian, W.N. Street and W.H. Wolberg. 
	Breast cancer diagnosis and prognosis via linear programming. 
	Operations Research, 43(4), pag

In [2]:
import numpy as np

in_arr1 = np.array([1, 2, 3])
in_arr2 = np.array([3, 4, 5])


out_arr = np.subtract(1, in_arr1) 

print(out_arr)
print(in_arr1 * in_arr2)

[ 0 -1 -2]
[ 3  8 15]


In [3]:
import math


def concat(X):
    return pd.DataFrame(np.c_[np.ones((X.shape[0], 1)), X])




class MiniB:
    def __init__(self, X, y):
        self.w = np.random.normal(0, 0.000005, size = 31)
    
    def prediction(self, X):
        r = 1/(1 + np.exp((-1.0)*(X.dot(self.w))))
        return r
    
    
    
    
    def minib(self, train, n_k=0.000001, num_iterations=100, batch_size = 20):
        for iteration in range(num_iterations):
            t_shuf = train.sample(frac = 1)          
            y = t_shuf.iloc[:,1]
            y = y.replace(to_replace=['B', 'M'], value=['0', '1'])
            y = y.convert_dtypes()
            y = pd.to_numeric(y, errors='coerce')
            X = t_shuf.iloc[:,2:32]
            X = concat(X)
            for i in range(int(X.shape[0]/batch_size)):
                for j in range(batch_size):
                    z_neg = (-1.0)*(X.iloc[j,:].dot(self.w))
                    ex = np.exp(z_neg)
                    pred = 1/(1 + ex)
                    e = y.iloc[j] - pred
                    error_grad = np.dot(X.iloc[j,:].T, e)
                    self.w += n_k * error_grad * X.iloc[j,:]
        print(self.w)
        return self.w


In [4]:
import pandas as pd
BC = pd.read_csv('wdbc.data')
print(BC)
print(BC.shape)


       842302  M  17.99  10.38   122.8    1001   0.1184   0.2776   0.3001  \
0      842517  M  20.57  17.77  132.90  1326.0  0.08474  0.07864  0.08690   
1    84300903  M  19.69  21.25  130.00  1203.0  0.10960  0.15990  0.19740   
2    84348301  M  11.42  20.38   77.58   386.1  0.14250  0.28390  0.24140   
3    84358402  M  20.29  14.34  135.10  1297.0  0.10030  0.13280  0.19800   
4      843786  M  12.45  15.70   82.57   477.1  0.12780  0.17000  0.15780   
..        ... ..    ...    ...     ...     ...      ...      ...      ...   
563    926424  M  21.56  22.39  142.00  1479.0  0.11100  0.11590  0.24390   
564    926682  M  20.13  28.25  131.20  1261.0  0.09780  0.10340  0.14400   
565    926954  M  16.60  28.08  108.30   858.1  0.08455  0.10230  0.09251   
566    927241  M  20.60  29.33  140.10  1265.0  0.11780  0.27700  0.35140   
567     92751  B   7.76  24.54   47.92   181.0  0.05263  0.04362  0.00000   

      0.1471  ...   25.38  17.33   184.6    2019   0.1622   0.6656  0.7119 

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split


BC_train, BCtest= train_test_split(BC, train_size = 0.6)
BC_val, BC_test = train_test_split(BCtest, train_size = 0.5)

print(BC_train.head())
print(BC_train.shape)
print(BC_val.head())
print(BC_val.shape)
print(BC_test.head())
print(BC_test.shape)

       842302  M  17.99  10.38   122.8   1001   0.1184   0.2776   0.3001  \
238  88330202  M  17.46  39.28  113.40  920.6  0.09812  0.12980  0.14170   
324  89511502  B  12.67  17.30   81.25  489.9  0.10280  0.07664  0.03193   
294    891923  B  13.77  13.27   88.06  582.7  0.09198  0.06221  0.01063   
557    925277  B  14.59  22.68   96.39  657.1  0.08473  0.13300  0.10290   
117    864877  M  15.78  22.91  105.70  782.6  0.11550  0.17520  0.21330   

      0.1471  ...  25.38  17.33   184.6    2019  0.1622  0.6656   0.7119  \
238  0.08811  ...  22.51  44.87  141.20  1408.0  0.1365  0.3735  0.32410   
324  0.02107  ...  13.71  21.10   88.70   574.4  0.1384  0.1212  0.10200   
294  0.01917  ...  14.67  16.93   94.17   661.1  0.1170  0.1072  0.03732   
557  0.03736  ...  15.48  27.27  105.90   733.5  0.1026  0.3171  0.36620   
117  0.09479  ...  20.19  30.50  130.30  1272.0  0.1855  0.4925  0.73560   

      0.2654  0.4601   0.1189  
238  0.20660  0.2853  0.08496  
324  0.05602  0.2688  

In [6]:
X_train = BC_train.iloc[:,2:32]
y_train = BC_train.iloc[:,1]

X_train = concat(X_train)

MB = MiniB(X_train, y_train)
MB.w



MB.minib(BC_train)

  ex = np.exp(z_neg)


0    -2.294926e-03
1    -2.521217e-01
2    -5.480264e-01
3    -9.676222e+00
4    -5.757896e+00
5    -1.464960e-05
6    -1.690144e-06
7     3.437403e-05
8     9.817034e-06
9    -6.343154e-05
10   -7.991130e-06
11    1.039179e-04
12   -2.413823e-03
13    8.700722e-03
14    3.513592e+00
15    6.788376e-06
16    1.181313e-06
17   -9.523249e-07
18    6.076550e-06
19    2.402488e-06
20   -3.578656e-06
21   -3.033625e-01
22   -9.370363e-01
23   -1.152195e+01
24    7.080719e+00
25   -2.419213e-05
26    8.092544e-05
27    1.495984e-04
28    8.609826e-06
29   -1.168810e-04
30   -1.707911e-05
Name: 0, dtype: float64


0    -2.294926e-03
1    -2.521217e-01
2    -5.480264e-01
3    -9.676222e+00
4    -5.757896e+00
5    -1.464960e-05
6    -1.690144e-06
7     3.437403e-05
8     9.817034e-06
9    -6.343154e-05
10   -7.991130e-06
11    1.039179e-04
12   -2.413823e-03
13    8.700722e-03
14    3.513592e+00
15    6.788376e-06
16    1.181313e-06
17   -9.523249e-07
18    6.076550e-06
19    2.402488e-06
20   -3.578656e-06
21   -3.033625e-01
22   -9.370363e-01
23   -1.152195e+01
24    7.080719e+00
25   -2.419213e-05
26    8.092544e-05
27    1.495984e-04
28    8.609826e-06
29   -1.168810e-04
30   -1.707911e-05
Name: 0, dtype: float64

In [7]:
p = MB.prediction(X_train)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [8]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score


p = np.where(p > 0.5, 1, 0)
print(p)
print(p.shape)
y_train = y_train.replace(to_replace=['B', 'M'], value=['0', '1'])
y_train = y_train.convert_dtypes()
y_train = pd.to_numeric(y_train, errors='coerce')
print(y_train)
print(accuracy_score(y_train, p))
print(precision_score(y_train, p))
print(recall_score(y_train, p))
print(f1_score(y_train, p))

[1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 1
 1 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 1 0
 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 1 1 0 0 0 0 0
 1 1 0 0 1 0 1 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 1
 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 0
 0 0 0 0 1 1 0 1 0 0 0 1 0 1 1 1 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 0 0 0
 0 0 1 1 1 1 0 0 0 1 0 0 1 0 1 1 0 1 0 1 0 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0
 0 0 1 0 0 0 0]
(340,)
238    1
324    0
294    0
557    0
117    1
      ..
93     1
302    0
245    0
205    0
54     0
Name: M, Length: 340, dtype: Int64
0.9264705882352942
0.9338842975206612
0.8692307692307693
0.9003984063745021


In [9]:
X_train = BC_train.iloc[:,2:32]
y_train = BC_train.iloc[:,1]

X_train = concat(X_train)


In [10]:
import pandas as pd

y_train
y_train = y_train.replace(to_replace=['B', 'M'], value=['0', '1'])
y_train = y_train.convert_dtypes()
y_train = pd.to_numeric(y_train, errors='coerce')
y_train.iloc[3]

0