In [31]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("seaborn")

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def make_prediction(w, x, classify=False):
    z = sigmoid(np.dot(w, x))
    if classify:
        return int(z > 0.5)
    else:
        return z
def cross_entropy(y_true, y_pred):
    data_num = len(y_true)
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    total = - np.sum(np.multiply(y_true, np.log(y_pred)) + np.multiply((1 - y_true), np.log(1 - y_pred)))
    return total / data_num
def gradient_descent(alpha, epoch, weight, X, y, threshold, print_option=True, get_cost=False):
    y_pred = np.array([make_prediction(weight, x) for x in X])
    data_num = len(y)
    cost = []
    for i in range(epoch):
        dw = np.dot((y_pred - y), X) / data_num
        weight = weight - alpha * dw
        y_pred = np.array([make_prediction(weight, x) for x in X])
        new_cost = cross_entropy(y, y_pred)
        cost.append(new_cost)
        if print_option and i%50==0: print("Iteration {0}, Cost: {1}".format(i, new_cost))
        if i > 3 and cost[-2] - cost[-1] < threshold:
            break
    if get_cost:
        return cost
    else:
        return weight
    
def logistic_regression(training_set, label, test_set, alpha, epoch, threshold=0.0001, print_option=False, get_cost=False):
    weight = np.random.rand(len(training_set[0]))
    if get_cost:
        cost = gradient_descent(alpha, epoch, weight, training_set, label, threshold, print_option, get_cost)
        return cost
    else:
        new_weight = gradient_descent(alpha, epoch, weight, training_set, label, threshold, print_option)
        prediction = [make_prediction(new_weight, instance, classify=True) for instance in test_set]
        return np.array(prediction)
    
def accuracy_score(y_true, y_pred):
    count = 0
    for true, pred in zip(y_true, y_pred):
        if true == pred:
            count += 1
    return count/len(y_true)
def plot_accuracy(alpha, epoch):
    accuracy = []
    iter_range = range(epoch)
    for iter_num in iter_range:
        y_hat = logistic_regression(X_train, y_train, X_test, alpha, iter_num)
        accuracy.append(accuracy_score(y_hat, y_test))
    plt.plot(iter_range, accuracy, color="skyblue")
    plt.xlabel("Epochs"); plt.ylabel("Accuracy")
    plt.show()
