This notebook performs the Softmax Regression. Is is still under development.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math
import sklearn.metrics as metrics
import sklearn.datasets as sk_datasets
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [None]:
def softmax_function(theta,X,k)

	expk = np.dot(theta[k],X)
	exp_sum = np.zeros(expk.shape)

	for c in theta:
		exp = np.exp(theta[c],X)
		exp_sum += exp

	return expk / exp_sum

In [None]:
#-----------------------------------
#   Evaluation Metrics and Loss Functions
#-----------------------------------

def cross_entropy_loss(h, y):
    # y.log(h) + (1-log(h) . 1-y)
    # log probability * inverse of the log probabality 
	eps = np.finfo(np.float).eps
	h[h < eps] = eps
	h[h > 1.-eps] = 1.-eps
	return np.multiply(np.log(h),y) + np.multiply((np.log(1-h)),(1-y))

def accuracy_score(Y,predY,mode='binary'):
	acc = 0.0
	if (mode=='binary'):
		TP = ((predY == Y) & (predY == 1.)).sum()
		TN = ((predY == Y) & (predY == 0.)).sum()	
		acc = (TP + TN) / Y.shape[0]
	elif (mode=='multi'):
		TP = (predY == Y).sum()
		acc = TP / Y.shape[0]
	return acc

def precision_score(Y,predY,mode='binary'):
	precision = 0.0
	if (mode=='binary'):
		TP = ((predY == Y) & (predY == 1)).sum()
		FP = ((predY != Y) & (predY == 1)).sum()
		precision = TP / (TP + FP)
	elif (mode=='multi'):
		classes=np.unique(Y)
		for c in classes:
			TP = ((predY == Y) & (predY == c)).sum()
			FP = ((predY != Y) & (predY == c)).sum()
			precision += TP / (TP + FP)
		precision /= len(classes)
	return precision

def recall_score(Y,predY,mode='binary'):
	recall = 0.0
	if (mode=='binary'):
		TP = ((predY == Y) & (predY == 1)).sum()
		FN = ((predY != Y) & (predY == 0)).sum()
		recall = TP / (TP + FN)
	elif (mode=='multi'):
		classes=np.unique(Y)
		for c in classes:
			TP = ((predY == Y) & (predY == c)).sum()
			FN = ((predY != Y) & (Y == c)).sum()
			recall += TP / (TP + FN)
		recall /= len(classes)
	return recall

def fb_score(Y,predY,beta,mode='binary'):
	fbscore = 0.0
	if (mode=='binary'):
		precision = PrecisionScore(predY,Y)
		recall = RecallScore(predY,Y)
		fscore = (1 + beta*beta)*((precision*recall)/((beta*beta*precision)+recall))
	elif (mode=='multi'):
		precision = PrecisionScore(predY,Y,'multi')
		recall = RecallScore(predY,Y,'multi')
		fscore = (1 + beta*beta)*((precision*recall)/((beta*beta*precision)+recall))
	return fscore


In [None]:
#-----------------------------------
#   Gradient Descent
#-----------------------------------

def BGD(X,y,alpha,iterations):

	X = np.insert(X,0,1,axis=1)

	nsamples = X.shape[0]
	nfeatures = X.shape[1]
	theta = np.zeros(nfeatures,dtype=np.float128)
	J=[]	

	for i in range(iterations):

		h = softmax_function(theta,X)

		error = h - y

		grad = np.dot(X.transpose(),error)/nsamples

		theta = theta - alpha*grad		

		J.append(Cost(theta,X,y))		

	X = np.delete(X,0,axis=1)

	plt.plot(J)	
	plt.ylabel('Error')
	plt.xlabel('iterations')
	plt.show()

	return theta,J[iterations-1]

In [None]:
#-----------------------------------
# MultiClass Classification
#-----------------------------------

# Toy example


X,y = sk_datasets.make_classification(n_samples = 500, n_features = 20, n_classes = 4, n_clusters_per_class=1, n_informative=4,
										n_redundant=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

print(X_train)
print(y_train)

classes = np.unique(y)
print(classes)
theta = {}

for c in classes:

	cy = np.copy(y_train)

	cy[y_train != c] = 0.
	cy[y_train == c] = 1.

	acc = BGD(theta,X_train,cy,0.0001,50000)

predY = ClassifyMultiClass(theta,X_test)
print("\n--- Classification")
print(predY)
print("\n--- Expected Output")
print(y_test)

acc = AccuracyScore(y_test,predY,mode='multi')
sk_acc = metrics.accuracy_score(y_test,predY)
pre = PrecisionScore(y_test,predY,mode='multi')
sk_pre = metrics.precision_score(y_test,predY,average='micro')
recall = RecallScore(y_test,predY,mode='multi')
sk_recall = metrics.recall_score(y_test,predY,average='micro')
f = FbScore(y_test,predY,1,mode='multi')
sk_f = metrics.f1_score(y_test,predY,average='micro')

print()
print("myAccuracy: ", str(acc))
print("skAccuracy: ", str(sk_acc))
print()
print("myPrecision: ",str(pre))
print("skPrecision: ",str(sk_pre))
print()
print("myRecall: ",str(recall))
print("skRecall: ",str(sk_recall))
print()
print("myF1Score: ",str(f))
print("skF1Score: ",str(sk_f))