### Imports

In [None]:
import numpy as np
from numpy.random import normal
import scipy
import pandas as pd
import seaborn as sns
import sys #sys.maxsize

### Data Generation

#### Helper Functions

In [None]:
def gen_points(n_points, x_center, y_center, spread, cluster_id):
    x0 = [1] * n_points
    x = normal(loc = x_center, scale = spread, size = n_points)
    y = normal(loc = y_center, scale = spread, size = n_points)
    c = [cluster_id] * n_points
    return list(zip(x0, x, y, c))

#### Configuration

In [None]:
cl_size = 50
cl_spread = 1
cl_centers = [(5, 5), (10, 10)]

#### DataFrame Generation

In [None]:
df = None
cols = ['x0', 'x', 'y', 'cluster_id']
for i, (c_x, c_y) in enumerate(cl_centers):
    points = gen_points(cl_size, c_x, c_y, cl_spread, i)                          
    if df is None:
        df = pd.DataFrame(points, columns = cols)
    else:
        df2 = pd.DataFrame(points, columns = cols)
        df = df.append(df2, ignore_index = True)

In [None]:
sns.scatterplot("x", "y", "cluster_id", data = df)

### Program

In [None]:
#inputs
X = df.loc[:, ['x0', 'x', 'y']].values
y = df.loc[:, 'cluster_id'].values
y = np.reshape(y, (2*cl_size, 1))
#initialize weights and biases to random values
#W[0] is the bias
W = normal(loc = 0, scale = 1, size = (3, 1))
print('X:', X.shape, 'y:', y.shape, 'W:', W.shape)

In [None]:
def cost_and_gradient(X, y, W):
    Z = np.dot(X, W)
    a = scipy.special.expit(Z)
    gradient = np.dot(np.transpose(a-y), X)
    #RuntimeWarning: divide by zero encountered in log
    #RuntimeWarning: invalid value encountered in multiply
    #use scipy.special to avoid divide-by-zero error - didn't work
    #try adding an infinitesimal number
    inf_small = 0.0000001
    L = (-1/(X.shape[0]))*np.sum((y*np.log(a+inf_small) + \
                        (1-y)*np.log(1-a+inf_small)))
    return L, gradient

In [None]:
#learning based on gradient descent
def gradient_descent(X, y, W, lrate = 0.01):
    old_loss = sys.maxsize
    new_loss, grads = cost_and_gradient(X, y, W)
    while abs(old_loss - new_loss) > 0.0001:
        #update the weights and bias in the direction of the gradient
        W = W - np.transpose(lrate*grads)
        #calculate the new loss
        old_loss = new_loss
        new_loss, grads = cost_and_gradient(X, y, W)
        print ('Loss:', new_loss)
    return W

In [None]:
W_new = gradient_descent(X, y, W, 0.0001)

In [None]:
print (W)
print (W_new)

In [None]:
for i, (x0, x1, x2) in enumerate(X):
    pred = W_new[0]*x0 + W_new[1]*x1 + W_new[2]*x2 #predicted value
    pred_y = lambda x: 1 if x >= 0.5 else 0 #predicted label
    print (pred, y[i], pred_y(pred))