In [1]:
import numpy as np

In [2]:
def read_training_data(fname, D=None):
    """Given a file in appropriate format, and given a set D of features,
    returns the pair (A, b) consisting of
    a P-by-D matrix A and a P-vector b,
    where P is a set of patient identification integers (IDs).

    For each patient ID p,
      - row p of A is the D-vector describing patient p's tissue sample,
      - entry p of b is +1 if patient p's tissue is malignant, and -1 if it is benign.

    The set D of features must be a subset of the features in the data (see text).
    """
    file = open(fname)
    params = ["radius", "texture", "perimeter","area","smoothness","compactness","concavity","concave points","symmetry","fractal dimension"];
    stats = ["(mean)", "(stderr)", "(worst)"]
    feature_labels = set([y+x for x in stats for y in params])
    feature_map = {params[i]+stats[j]:j*len(params)+i for i in range(len(params)) for j in range(len(stats))}
    
    patient_ids = []
    feature_vectors = []
    patient_diagnoses = []
    for line in file:
        row = line.split(",")
        patient_ID = int(row[0])
        patient_ids.append(patient_ID)
        
        patient_diagnoses.append(-1 if row[1]=='B' else +1)
        feature_vectors.append([float(row[feature_map[key]+2]) for key in feature_labels])
    return feature_labels, patient_ids, np.matrix(feature_vectors), np.array(patient_diagnoses) 

In [3]:
labels, ids, matrix, vectors = read_training_data('train.data')

In [4]:
print(labels)

{'compactness(worst)', 'concavity(worst)', 'radius(worst)', 'perimeter(mean)', 'radius(mean)', 'area(stderr)', 'texture(worst)', 'fractal dimension(stderr)', 'concave points(stderr)', 'smoothness(worst)', 'concave points(mean)', 'concavity(mean)', 'smoothness(mean)', 'texture(stderr)', 'concave points(worst)', 'radius(stderr)', 'perimeter(worst)', 'perimeter(stderr)', 'concavity(stderr)', 'area(mean)', 'compactness(stderr)', 'symmetry(mean)', 'compactness(mean)', 'area(worst)', 'symmetry(worst)', 'fractal dimension(worst)', 'fractal dimension(mean)', 'smoothness(stderr)', 'symmetry(stderr)', 'texture(mean)'}


In [5]:
def signum(u):
    return np.array([1 if v >= 0 else -1 for k, v in np.ndenumerate(u)])

In [6]:
signum(np.array([10, -20, 5, -3, 6]))

array([ 1, -1,  1, -1,  1])

In [7]:
def fraction_wrong(A, b, w):
    hypers_size = len(b)
#     hypers = []
#     for row in A:
#         row = np.array(row)
#         hypers.append(row.dot(w)[0])
#     hypers = np.array(hypers)
#     diff = signum(hypers).dot(b)

#     print(hypers_size)
#     print(signum(A.dot(w)).dot(b))
    hypers = signum(A.dot(w))
    diff = hypers.dot(b)
    diff = (hypers_size - diff) * 0.5
    ratio  = diff / hypers_size
    
    return ratio
    

In [8]:
w = [v for v in np.random.rand(30, 1)]
print(w)
# w = [1 for v in range(30)]
fraction_wrong(matrix, vectors, w)

[array([ 0.84279347]), array([ 0.48365235]), array([ 0.69272041]), array([ 0.59582593]), array([ 0.51002136]), array([ 0.19013661]), array([ 0.35595869]), array([ 0.39070283]), array([ 0.71333961]), array([ 0.68382108]), array([ 0.16572063]), array([ 0.22781691]), array([ 0.5291366]), array([ 0.49040142]), array([ 0.13934652]), array([ 0.49695942]), array([ 0.98564049]), array([ 0.50295054]), array([ 0.37055127]), array([ 0.16036179]), array([ 0.31695341]), array([ 0.90839072]), array([ 0.90192716]), array([ 0.07379723]), array([ 0.4813886]), array([ 0.07421961]), array([ 0.40510735]), array([ 0.06278907]), array([ 0.63082736]), array([ 0.0980717])]


0.51333333333333331

In [9]:
def loss(A, b, w):
    hypersis = np.squeeze(np.asarray(A.dot(w)))
    error = hypersis - b
    return error.dot(error)

In [10]:
w = [v for v in np.random.rand(30, 1)]
loss(matrix, vectors, w)

18317690.434633065

In [11]:
w = [v for v in np.random.rand(30, 1)]
loss(matrix, vectors, w)

590811920.96350086

In [12]:
w = [v for v in np.random.rand(30, 1)]
loss(matrix, vectors, w)

521381048.17933881

In [13]:
def find_grad(A, b, w):
#     hypersis = np.squeeze(np.asarray(A.dot(w)))
#     error = np.transpose(hypersis - b)
#     grad = np.squeeze(np.asarray(error.dot(A)))
#     return grad * 2
    
    hypersis = np.squeeze(np.squeeze(np.asarray(A.dot(w))).dot(A))
    ba = b.dot(A)
    error = np.transpose(hypersis - ba)
    return np.squeeze(np.asarray(error)) * 2

In [14]:
w = [v for v in np.random.rand(30, 1)]
find_grad(matrix, vectors, w)

array([  9.32209648e+04,   1.06226732e+05,   5.74612402e+06,
         3.16351854e+07,   4.81864908e+06,   1.85529377e+07,
         8.14290591e+06,   1.18150156e+03,   4.00185077e+03,
         4.10865142e+04,   2.08793130e+04,   3.76956828e+04,
         2.96079073e+04,   3.59483498e+05,   4.44488436e+04,
         1.61442360e+05,   3.81470490e+07,   1.14367552e+06,
         1.10528102e+04,   2.54981437e+08,   8.75535962e+03,
         5.60243486e+04,   3.74025807e+04,   3.65225693e+08,
         9.16167857e+04,   2.58582338e+04,   1.85545473e+04,
         2.04991584e+03,   6.20669164e+03,   6.05756498e+06])

In [15]:
def gradient_descent_step (A, b, w, sigma):
    return w - find_grad(A, b, w) * sigma

In [16]:
def gradient_descent (A, b, w, sigma, T):
    for t in range(T):
        w = gradient_descent_step(A, b, w, sigma)
#         print(loss(A, b, w))
    return w

In [17]:
w = [1 for v in range(30)]
w = gradient_descent(matrix, vectors, w, 2 * 10 ** -9, 30)
print(loss(matrix, vectors, w))

3.18014007384e+13


In [18]:
w = [1 for v in range(30)]
w = gradient_descent(matrix, vectors, w, 10 ** -9, 30)
print(loss(matrix, vectors, w))

1867476.16499


In [19]:
w = [0 for v in range(30)]
w = gradient_descent(matrix, vectors, w, 10 ** -9, 30)
print(loss(matrix, vectors, w))

251.010760796


In [20]:
w = [0 for v in range(30)]
w = gradient_descent(matrix, vectors, w, 10 ** -9, 60)
print(loss(matrix, vectors, w))

239.281498848


In [21]:
import random

w = [random.random() for v in range(30)]
w = gradient_descent(matrix, vectors, w, 10 ** -9, 30)
print(loss(matrix, vectors, w))

2104965.43511


In [22]:
def MinMaxScaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # noise term prevents the zero division
    return numerator / (denominator + 1e-7)

In [23]:
nomalized_matrix = np.matrix(MinMaxScaler(np.array(matrix)))

In [24]:
w = [random.random() -0.5 for v in range(30)]
w = gradient_descent(nomalized_matrix, vectors, w, 10 ** -9, 10000)
print(loss(nomalized_matrix, vectors, w))

405.561260277


In [25]:
w = [random.random() -0.5 for v in range(30)]
w = gradient_descent(nomalized_matrix, vectors, w, 10 ** -6, 10000)
print(loss(nomalized_matrix, vectors, w))

220.5131476


In [26]:
w = [random.random() -0.5 for v in range(30)]
w = gradient_descent(nomalized_matrix, vectors, w, 10 ** -6, 30000)
print(loss(nomalized_matrix, vectors, w))

151.537511188


In [27]:
w = [random.random() -0.5 for v in range(30)]
w = gradient_descent(nomalized_matrix, vectors, w, 10 ** -3, 30000)
print(loss(nomalized_matrix, vectors, w))

92.6413208476


In [28]:
w = [random.random() -0.5 for v in range(30)]
w = gradient_descent(nomalized_matrix, vectors, w, 10 ** -3, 10000)
print(loss(nomalized_matrix, vectors, w))

95.0693938159
