In [None]:
import math

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [None]:
def dot_product(a_vector, b_vector):
    # a1 x b1 + a2 * b2..an*bn return scalar
    return sum([an * bn for an, bn in zip(a_vector,b_vector)])

x = [2.5,-5,-1.2,0.5,2,0.7]
Y = [3,2,1,3,0,4.19]

print(round(dot_product(x,Y), 5))

classified_1 = round(dot_product(x,Y), 5) + 0.1
prob_1 = sigmoid(classified_1)
print(round(prob_1, 2))

In [None]:
import numpy as np

x = [2.5,-5,-1.2,0.5,2,0.7]
Y = [3,2,1,3,0,4.19]

result = np.dot(x, Y)
print(round(result, 5))

classified_2 = round(result, 5) + 0.1
prob_2 = sigmoid(classified_1)
n_prob_2 = 1 - sigmoid(classified_1)
print(round(prob_2, 2))
print(round(n_prob_2, 2))

In [None]:
# Loss
import math
import numpy

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

result = np.dot(x, Y)
classified_2 = round(result, 5) + 0.1
p_prob = sigmoid(classified_1)
n_prob = 1 - sigmoid(classified_1)
print(f"Positive probability: {round(p_prob, 2)}")
print(f"Negative probability: {round(n_prob, 2)}")

# y = 1
# LCE = -[y * log(sigmoid(np.dot(x, Y) + 0.1) + (1 - y) * log(1 - sigmoid(np.dot(x, Y) + 0.1))]
# LCE = -[1 * log(sigmoid(np.dot(x, Y) + 0.1) + (1 - 1) * log(1 - sigmoid(np.dot(x, Y) + 0.1))]
# LCE = -[log(sigmoid(np.dot(x, Y) + 0.1) + 0]
# LCE = -[log(sigmoid(np.dot(x, Y) + 0.1)]

p_lce = -(math.log(round(p_prob, 2)))
print(f"Cross-entropy loss (positive prediction y=1): {round(p_lce, 2)}")

# y = 0
# LCE = -[y * log(sigmoid(np.dot(x, Y) + 0.1) + (1 - y) * log(1 - sigmoid(np.dot(x, Y) + 0.1))]
# LCE = -[0 * log(sigmoid(np.dot(x, Y) + 0.1) + (1 - 0) * log(1 - sigmoid(np.dot(x, Y) + 0.1))]
# LCE = -[0 + (1) * log(1 - sigmoid(np.dot(x, Y) + 0.1))]
# LCE = -[log(1 - sigmoid(np.dot(x, Y) + 0.1))]
n_lce = -(math.log(round(n_prob, 2)))
print(f"Cross-entropy loss (negative prediction y=0): {round(n_lce, 2)}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def sigmoid_range(x):
    a = []
    for item in x:
        a.append(1 / (1 + math.exp(-item)))
    return a

x = np.arange(-10., 10., 0.1)
sig = sigmoid_range(x)
plt.plot(x,sig)
plt.show()

In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt


def loadCSV(filename):
    '''
    function to load dataset
    '''
    with open(filename,"r") as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        for i in range(len(dataset)):
            dataset[i] = [float(x) for x in dataset[i]]
    return np.array(dataset)


def normalize(x):
    '''
    function to normalize feature matrix, x
    '''
    mins = np.min(x, axis = 0)
    maxs = np.max(x, axis = 0)
    rng = maxs - mins
    norm_x = 1 - ((maxs - x)/rng)
    return norm_x


def logistic_func(theta, x):
    '''
    logistic(sigmoid) function 1/1+e^-theta.T * x
    '''
    return 1.0/(1 + np.exp(-np.dot(x, theta.T)))


def log_gradient(theta, x, y):
    '''
    logistic gradient function [Y(PREDICTION)- Y(ACTUAL)).Transpose *  x
    '''
    first_calc = logistic_func(theta, x) - y.reshape(x.shape[0], -1)
    final_calc = np.dot(first_calc.T, x)
    return final_calc


def cost_func(theta, x, y):
    '''
    cost function, J
    '''
    log_func_v = logistic_func(theta, x)
    y = np.squeeze(y)
    step1 = y * np.log(log_func_v)
    step2 = (1 - y) * np.log(1 - log_func_v)
    final = -step1 - step2
    return np.mean(final)


def grad_desc(x, y, theta, lr=.001, converge_change=.001):
    '''
    gradient descent function
    '''
    cost = cost_func(theta, x, y)
    change_cost = 1
    num_iter = 1

    while(change_cost > converge_change):
        old_cost = cost
        theta = theta - (lr * log_gradient(theta, x, y))
        cost = cost_func(theta, x, y)
        change_cost = old_cost - cost
        num_iter += 1

    return theta, num_iter


def pred_values(theta, x):
    '''
    function to predict labels
    '''
    pred_prob = logistic_func(theta, x)
    pred_value = np.where(pred_prob >= .5, 1, 0)
    return np.squeeze(pred_value)


def plot_reg(x, y, theta):
    '''
    function to plot decision boundary
    '''
    # labelled observations
    x_0 = x[np.where(y == 0.0)]
    x_1 = x[np.where(y == 1.0)]

    # plotting points with diff color for diff label
    plt.scatter([x_0[:, 1]], [x_0[:, 2]], c='b', label='y = 0')
    plt.scatter([x_1[:, 1]], [x_1[:, 2]], c='r', label='y = 1')

    # plotting decision boundary
    x1 = np.arange(0, 1, 0.1)
    x2 = -(theta[0,0] + theta[0,1]*x1)/theta[0,2]
    plt.plot(x1, x2, c='k', label='reg line')

    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.legend()
    plt.show()

# load the dataset
dataset = loadCSV('./datasets/logistic.csv')

# normalizing feature matrix
x = normalize(dataset[:, :-1])
print(x)

# stacking columns wth all ones in feature matrix
x = np.hstack((np.matrix(np.ones(x.shape[0])).T, x))

# response vector
y = dataset[:, -1]
print (y)

# initial beta values
theta = np.matrix(np.zeros(x.shape[1]))
print (theta )
# beta values after running gradient descent
theta, num_iter = grad_desc(x, y, theta)

# estimated beta values and number of iterations
print("Estimated regression coefficients:", theta)
print("No. of iterations:", num_iter)

# predicted labels
y_pred = pred_values(theta, x)

# number of correctly predicted labels
print("Correctly predicted labels:", np.sum(y == y_pred))

# plotting regression line
plot_reg(x, y, theta)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn import datasets

iris = datasets.load_iris()
x = iris.data[:, 0:2]  # we only take the first two features for visualization
y = iris.target

n_features = x.shape[1]

C = 10
kernel = 1.0 * RBF([1.0, 1.0])  # for GPC

# Create different classifiers.
classifiers = {
    'L1 logistic': LogisticRegression(C=C, penalty='l1',
                                      solver='saga',
                                      multi_class='multinomial',
                                      max_iter=10000),
    'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',
                                                    solver='saga',
                                                    multi_class='multinomial',
                                                    max_iter=10000),
    'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',
                                            solver='saga',
                                            multi_class='ovr',
                                            max_iter=10000),
    'Linear SVC': SVC(kernel='linear', C=C, probability=True,
                      random_state=0),
    'GPC': GaussianProcessClassifier(kernel)
}

n_classifiers = len(classifiers)

plt.figure(figsize=(3 * 2, n_classifiers * 2))
plt.subplots_adjust(bottom=.2, top=.95)

xx = np.linspace(3, 9, 100)
yy = np.linspace(1, 5, 100).T
xx, yy = np.meshgrid(xx, yy)
xfull = np.c_[xx.ravel(), yy.ravel()]

for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(x, y)

    y_pred = classifier.predict(x)
    accuracy = accuracy_score(y, y_pred)
    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))

    # View probabilities:
    probas = classifier.predict_proba(xfull)
    n_classes = np.unique(y_pred).size
    for k in range(n_classes):
        plt.subplot(n_classifiers, n_classes, index * n_classes + k + 1)
        plt.title("Class %d" % k)
        if k == 0:
            plt.ylabel(name)
        imshow_handle = plt.imshow(probas[:, k].reshape((100, 100)),
                                   extent=(3, 9, 1, 5), origin='lower')
        plt.xticks(())
        plt.yticks(())
        idx = (y_pred == k)
        if idx.any():
            plt.scatter(x[idx, 0], x[idx, 1], marker='o', c='w', edgecolor='k')

ax = plt.axes([0.15, 0.04, 0.7, 0.05])
plt.title("Probability")
plt.colorbar(imshow_handle, cax=ax, orientation='horizontal')

plt.show()

In [None]:
from time import time
import numpy as np
import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn import datasets
from sklearn.svm import l1_min_c

iris = datasets.load_iris()
x = iris.data
y = iris.target

x = x[y != 2]
y = y[y != 2]

x /= x.max()  # Normalize x to speed-up convergence

cs = l1_min_c(x, y, loss='log') * np.logspace(0, 7, 16)

print("Computing regularization path ...")
start = time()
clf = linear_model.LogisticRegression(penalty='l1', solver='liblinear',
                                      tol=1e-6, max_iter=int(1e6),
                                      warm_start=True,
                                      intercept_scaling=10000.)
coefs_ = []
for c in cs:
    clf.set_params(C=c)
    clf.fit(x, y)
    coefs_.append(clf.coef_.ravel().copy())
print("This took %0.3fs" % (time() - start))

coefs_ = np.array(coefs_)
plt.plot(np.log10(cs), coefs_, marker='o')
ymin, ymax = plt.ylim()
plt.xlabel('log(C)')
plt.ylabel('Coefficients')
plt.title('Logistic Regression Path')
plt.axis('tight')
plt.show()