## Libraries

In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets
import idx2numpy
from sklearn.model_selection import train_test_split

## Functions

In [49]:
#     Features:(n_examples, n_features)
#     Labels: (n_examples, 1)
#     Weights:(n_features, 1)


# -- Sigmoid function -- #
def sigmoid(z):
    " Return the sigmoid function "
    return 1/(1 + np.exp(-z))

# -- Predictor function -- #
def predict(features, weights):
    '''
    Returns probabilities
    of class label
    '''
    z = np.dot(features, weights)
    return sigmoid(z)

# -- Cost function -- #
def cost_function(features, labels, weights):
    '''    
    Write down the average cross-entropy loss
    '''
    observations = len(labels)

    predictions = predict(features, weights)

    # You may use this format or define in your own way
    # Note: This is for binary cros-entropy loss.
    
    # We can divide the binary cross entropy loss into two parts
    # One for p(y=1) and another for p(y=0)

    # Take the error when label=1
    class1_cost = np.dot(labels, np.log(predictions)) 

    # Take the error when label=0
    class2_cost = np.dot((1 - labels), np.log(1 - predictions))

    # Take the sum of both costs
    cost = np.sum(class1_cost,class2_cost)

    # Take the average cost
    cost = - (cost/observations)

    return cost


def update_weights(features, labels, weights, lr):
    '''
   Gradient Descent
    '''
    N = len(features.columns)# number of features

    #1 - Get Predictions
    predictions = predict(features, weights)

    gradient = np.dot((predictions - labels) , features) # Write down the gradient of the cost

    # 3 Take the average cost derivative for each feature
    gradient /= N

    # 4 Update the weights using the learning rate lr
    weights = weights - lr * gradient # Write down the update rule here

    return weights

def decision_boundary(prob):
    return 1 if prob >= .5 else 0 # You may change the threshold from 0.5 

def train(features, labels, weights, lr, iters):
    cost_history = [np.float64('inf')]

    for i in range(iters):
        weights = update_weights(features, labels, weights, lr)

        #Calculate error for auditing purposes
        cost = cost_function(features, labels, weights)
        
        cost_history.append(cost)

        print("iter: "+str(i) + " cost: "+str(cost))

    return weights, cost_history

def accuracy(predicted_labels, actual_labels):
    diff = predicted_labels - actual_labels
    return 1.0 - (float(np.count_nonzero(diff)) / len(diff))

## Data

In [50]:
# Read the data set here
data = sklearn.datasets.make_classification(n_samples=1000, n_classes=10,n_clusters_per_class=1, n_features= 10,n_informative=10, n_redundant=0, n_repeated=0) 

data # print a snippet of the data

(array([[-2.58878035, -1.83866169, -1.91523246, ..., -1.89015625,
         -0.43791834, -3.02046427],
        [-0.4544144 , -1.26805567,  1.56154775, ...,  2.73278536,
         -2.99483166,  1.55683739],
        [ 2.0700028 ,  0.87351957, -0.23769278, ..., -1.13280434,
         -4.24606079, -1.40908083],
        ...,
        [-4.10247057,  2.25632609, -4.84383965, ...,  2.50806318,
          0.28860853, -2.70506016],
        [ 1.34312391, -0.68034037, -0.83950483, ...,  0.38360686,
         -0.51662404,  0.22532075],
        [ 1.87693361, -1.98231942,  2.22928456, ..., -4.32192539,
         -0.72617635, -0.84724951]]),
 array([2, 6, 0, 9, 4, 8, 7, 7, 6, 6, 4, 8, 6, 2, 3, 4, 8, 6, 4, 3, 7, 4,
        2, 4, 7, 7, 0, 5, 8, 4, 3, 0, 3, 5, 8, 4, 0, 7, 7, 8, 3, 6, 8, 1,
        4, 9, 0, 3, 7, 0, 2, 9, 9, 0, 0, 1, 0, 3, 4, 2, 8, 5, 9, 1, 8, 2,
        6, 6, 9, 1, 0, 2, 8, 4, 3, 7, 4, 0, 2, 8, 2, 3, 4, 2, 9, 5, 3, 7,
        1, 7, 1, 0, 6, 2, 0, 7, 8, 1, 2, 9, 0, 7, 1, 5, 7, 6, 3, 1, 1, 2,
   

In [51]:
X = pd.DataFrame(data[0])# Features
y = data[1]# Target labels

In [52]:
# Train Test Split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state= 42)

In [53]:
train_data = []
for i in np.unique(y_train) :
    l1 = np.zeros(len(y_train))
    l1[np.where(y_train == i)] = 1
    train_data.append(l1)
    
    
###################

test_data = []
for i in np.unique(y_test) :
    l1 = np.zeros(len(y_test))
    l1[np.where(y_test == i)] = 1
    test_data.append(l1)

In [54]:
# Initialize the weights
weights = []
losses = []
for i in train_data :
    wts = np.random.normal(0, 1, len(x_train.columns))
    # Training
    optimal_wts, loss = train(x_train, i, wts, 1e-5, 1000)
    weights.append(optimal_wts)
    losses.append(loss)

TypeError: 'numpy.float64' object cannot be interpreted as an integer

In [None]:
# Plot the loss against the number of epochs
plt.plot(np.arange(1, len(loss)+1, 1), loss)
plt.show()

In [None]:
predictions = []
for elem in predict(X, optimal_wts):
    predictions.append(decision_boundary(elem))

In [None]:
# # Plot the decision boundary for two features. For that, we will assign a color to each
# # point in the mesh [x_min, x_max]x[y_min, y_max].
# x_min, x_max = X['Feature_1'].min() - 0.5, X['Feature_1'].max() + 0.5
# y_min, y_max = X['Feature_2'].min() - 0.5, X['Feature_2'].max() + 0.5
# h = 0.02  # step size in the mesh
# xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# probs = predict(np.c_[xx.ravel(), yy.ravel()], optimal_wts)
# mesh_preds = []
# for elem in probs:
#     mesh_preds.append(decision_boundary(elem))
# Z = np.array(mesh_preds)
# # Put the result into a color plot
# Z = Z.reshape(xx.shape)
# plt.figure(1, figsize=(12, 5))
# plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

# # Plot also the training points
# plt.scatter(X['Feature_1'], X['Feature_2'], c=y, edgecolors="k", cmap=plt.cm.Paired)
# plt.xlabel("Feature_1")
# plt.ylabel("Feature_2")
# plt.title('Gradient Descent')
# plt.xlim(xx.min(), xx.max())
# plt.ylim(yy.min(), yy.max())
# #plt.xticks(())
# #plt.yticks(())

# plt.show()

In [None]:
# Accuracy
print("Accuracy using gradient descent: {}".format('Write the accuracy function wrt predictions and true labels'))

## Scikit-Learn module

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
# L-BFGS
clf_lbfgs = LogisticRegression(solver=).fit(X, y)

# Newton-CG
clf_newt = LogisticRegression(solver=).fit(X, y)

In [None]:
# # Plot the decision boundary. For that, we will assign a color to each
# # point in the mesh [x_min, x_max]x[y_min, y_max].
# x_min, x_max = X['Feature_1'].min() - 0.5, X['Feature_1'].max() + 0.5
# y_min, y_max = X['Feature_2'].min() - 0.5, X['Feature_2'].max() + 0.5
# h = 0.02  # step size in the mesh
# xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Z_lbfgs = clf_lbfgs.predict(np.c_[xx.ravel(), yy.ravel()])
# Z_newt = clf_newt.predict(np.c_[xx.ravel(), yy.ravel()])

# # Put the result into a color plot
# Z_lbfgs = Z_lbfgs.reshape(xx.shape)
# Z_newt = Z_newt.reshape(xx.shape)

# fig = plt.figure(figsize=(12, 10))

# ax1 = plt.subplot(2, 1, 1)
# ax1.pcolormesh(xx, yy, Z_lbfgs, cmap=plt.cm.Paired)

# # Plot also the training points
# ax1.scatter(X['Feature_1], X['Feature_2'], c=y, edgecolors="k", cmap=plt.cm.Paired)
# ax1.set_xlabel("Feature_1")
# ax1.set_ylabel("Feature_2")

# ax1.set_xlim(xx.min(), xx.max())
# ax1.set_ylim(yy.min(), yy.max())
# ax1.set_title('L-BFGS')
# #ax1.set_xticks(())
# #ax1.set_yticks(())

# ax2 = plt.subplot(2, 1, 2)
# ax2.pcolormesh(xx, yy, Z_newt, cmap=plt.cm.Paired)

# # Plot also the training points
# ax2.scatter(X['Feature_1'], X['Feature_2'], c=y, edgecolors="k", cmap=plt.cm.Paired)
# ax2.set_xlabel("Feature_1")
# ax2.set_ylabel("Feature_2")

# ax2.set_xlim(xx.min(), xx.max())
# ax2.set_ylim(yy.min(), yy.max())
# ax2.set_title('Newton-CG')
# #ax1.set_xticks(())
# #ax1.set_yticks(())
# plt.tight_layout()
# plt.show()

In [None]:
# Accuracy
print("Accuracy using L-BFGS: {}".format('Write the accuracy function wrt predictions and true labels'))
print('\n')
print("Accuracy using Newton-CG: {}".format('Write the accuracy function wrt predictions and true labels'))