In [89]:
# Songgaojun Deng 10442772
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.utils import shuffle
filename = 'data1'

df = pd.read_csv(filename,header = None) # 569 rows × 32 columns

# Missing attribute values: none

### data processing

# Drop the first column ”id” which has no meaning for model training.
df.drop(df.columns[0], axis=1, inplace=True)
df.rename(columns = {1:'class'}, inplace = True)

#Set class label malignant positive 1 and benigh negative 0 for more convenient operations.
df.loc[df['class'] == 'M', 'class'] = 1 # malignant 
df.loc[df['class'] == 'B', 'class'] = 0 # 

# Divide dataset into training 0.8 and testing 0.2 sets randomly
X_train, X_test, y_train, y_test = train_test_split(df.values[:,1:], df.values[:,0], test_size=0.2, random_state=1)


#Determine the dimention of features and then initialize feature vector theta based on Normal Distribution with a mean of 0 and a standard deviation of 1.
M,N = df.values[:,1:].shape # M : number of samples, N : number of features
init_theta = np.random.normal(0, 1, N)

#Because K-Fold validation is used, so I set the number of folds in advance.
kfold = KFold(8, True, 1)

In [90]:
def sigmoid(x):
    return 1.0 / (1 + np.exp(-x))

# averaged loss value
def avg_logistic_loss(hypothesis, y):
    return (- y * np.log(hypothesis) * np.log(1.0-hypothesis)).mean()

# calculate the loss and gradient of feature
def evaluate_gradient(loss, X,y, theta):
    m = len(y) if y is list else 1
    XT = X.transpose()
    hypothesis = sigmoid(np.dot(X,theta))
    avg_loss = loss(hypothesis, y)
    
    gradient = np.dot(XT, (hypothesis - y))/ m * 1.0
    return gradient, avg_loss

def predict(theta, X):
    return sigmoid(np.dot(X,theta))

def accuracy(pred, true):
    correct = 0
    for i in range(len(pred)):
        if pred[i] == true[i] :
            correct += 1
    accuracy = (correct)*1.0/ len(pred)
    return accuracy

# return recall, precision and accuracy
def evaluate(pred, true):
    TP = 0
    FN = 0
    FP = 0
    TN = 0
    for i in range(len(pred)):
        if pred[i] == 1 and true[i] ==1:
            TP += 1
        elif pred[i] == 0 and true[i] ==1:
            FN += 1
        elif pred[i] == 1 and true[i] ==0:
            FP += 1
        else:
            TN += 1
    recall = TP*1.0/(TP+FN)
    precision = TP*1.0/(TP+FP)
    accuracy = (TP+TN)*1.0/(TP+FN+FP+TN)
    return recall, precision, accuracy

def stochastic_gradient_descent(X,y, theta, alpha = 0.0001, epoch = 100):
    for e in range(epoch):
        X, y = shuffle(X, y)
        for i in range(len(y)):
            gradient, avg_loss = evaluate_gradient(avg_logistic_loss,X[i],y[i],theta)
            theta -= alpha * gradient
    return theta

def mini_gradient_descent(X,y, theta, alpha = 0.0001, batch_size=20, epoch = 100):
    m,n = X.shape
    
    for e in range(epoch):
        for i in range(int(m/batch_size)):
            X, y = shuffle(X, y,n_samples = batch_size)
            gradient, avg_loss = evaluate_gradient(avg_logistic_loss,X,y,theta)
            theta -= alpha * gradient
    return theta

In [91]:
def GRADIENT_DESCENT_TRAINING(gradient_algor, X_train, y_train, X_test, y_test, theta):
    print gradient_algor.__name__
    
    alpha_list = []
    for i in range(15):
        alpha_list.append(i*0.01)
    val_acc_dict = {}
    val_acc_list = []
    theta_dict = {} # save trained theta model
    data = df       # training data

    for alpha in alpha_list:

        for train, val in kfold.split(X_train):
            # training
            theta = gradient_algor(X_train[train], y_train[train], theta, alpha)
            theta_dict[alpha] = theta
            pred =  predict(theta, X_train[val])
            acc = accuracy(pred, y_train[val])
            val_acc_list.append(acc)
        val_acc_dict[alpha] = np.mean(val_acc_list)
        print("alpha %f \t accuracy %f" % (alpha, acc))   
    
    # select model
    alpha = max(val_acc_dict, key=val_acc_dict.get)
    theta = theta_dict[alpha]
    
    # test model
    recall, precision, acc = evaluate(predict(theta,X_test),y_test)
    print
    print("selected alpha %f \t recall %f \t precision %f \t accuracy %f" % (alpha, recall, precision, acc))

In [92]:
# call mini_gradient_descent and report results
GRADIENT_DESCENT_TRAINING(mini_gradient_descent, X_train, y_train, X_test, y_test, init_theta)

mini_gradient_descent


  """
  """


alpha 0.000000 	 accuracy 0.392857


  


alpha 0.010000 	 accuracy 0.553571
alpha 0.020000 	 accuracy 0.875000
alpha 0.030000 	 accuracy 0.892857
alpha 0.040000 	 accuracy 0.839286
alpha 0.050000 	 accuracy 0.892857
alpha 0.060000 	 accuracy 0.928571
alpha 0.070000 	 accuracy 0.892857
alpha 0.080000 	 accuracy 0.839286
alpha 0.090000 	 accuracy 0.821429
alpha 0.100000 	 accuracy 0.857143
alpha 0.110000 	 accuracy 0.892857
alpha 0.120000 	 accuracy 0.875000
alpha 0.130000 	 accuracy 0.892857
alpha 0.140000 	 accuracy 0.875000

selected alpha 0.140000 	 recall 0.904762 	 precision 0.791667 	 accuracy 0.877193


In [93]:
# call stochastic_gradient_descent and report results
GRADIENT_DESCENT_TRAINING(stochastic_gradient_descent, X_train, y_train, X_test, y_test, init_theta)

stochastic_gradient_descent


  """
  """
  


alpha 0.000000 	 accuracy 0.875000
alpha 0.010000 	 accuracy 0.892857
alpha 0.020000 	 accuracy 0.875000
alpha 0.030000 	 accuracy 0.892857
alpha 0.040000 	 accuracy 0.892857
alpha 0.050000 	 accuracy 0.892857
alpha 0.060000 	 accuracy 0.839286
alpha 0.070000 	 accuracy 0.910714
alpha 0.080000 	 accuracy 0.892857
alpha 0.090000 	 accuracy 0.892857
alpha 0.100000 	 accuracy 0.875000
alpha 0.110000 	 accuracy 0.910714
alpha 0.120000 	 accuracy 0.910714
alpha 0.130000 	 accuracy 0.839286
alpha 0.140000 	 accuracy 0.857143

selected alpha 0.080000 	 recall 1.000000 	 precision 0.777778 	 accuracy 0.894737
