In [1]:
from scipy.optimize import  fmin_l_bfgs_b
import numpy
import sklearn.datasets
import ML_support as ml

def load_iris_binary():
    D, L = sklearn.datasets.load_iris()['data'].T, sklearn.datasets.load_iris()['target']
    D = D[:, L != 0] # We remove setosa from D
    L = L[L!=0] # We remove setosa from L
    L[L==2] = 0 # We assign label 0 to virginica (was label 2)
    return D, L


def split_db_2to1(D, L, seed=0):

    nTrain = int(D.shape[1]*2.0/3.0)
    numpy.random.seed(seed) #resetto generatore numeri casuali
    idx = numpy.random.permutation(D.shape[1]) #crea una lista CASUALE di valori compresi tra 0 (compreso) e 149 (compreso)
    idxTrain = idx[0:nTrain] #prendo i primi nTrain (=100) valori casuali
    idxTest = idx[nTrain:] #prendo i restanti valori casuali (cioè i restanti 50)

    DTR = D[:, idxTrain] #dati di training #matrice dei dati di training composta dai campioni indicati dalla lista di indici
    DTE = D[:, idxTest] #dati di test #matrice dei dati di test composta dai campioni indicati dalla lista di indici
    LTR = L[idxTrain] #etichette di training #matrice di etichette di training indicate dalla lista di indici
    LTE = L[idxTest] #etichette di test #matrice di etichette di test indicate dalla lista di indici

    return (DTR, LTR), (DTE, LTE)


def logreg_obj_wrap(DTR, LTR, l):

    def logreg_obj(v):
        n = DTR.shape[1]
        w, b = v[0:-1], v[-1]
        w = w.reshape((len(w), 1))
        J = 0
        x = DTR
        for idx in range(n):
            if LTR[idx] == 0:
                c = 0
            else:
                c = 1

            J += (c * numpy.log1p(numpy.exp(numpy.dot(-w.T, x[:, idx]) - b)) + (1 - c) * numpy.log1p(numpy.exp(numpy.dot(w.T, x[:, idx]) + b)))

        return l/2 * ((numpy.linalg.norm(w))**2) + 1/n * J

    return logreg_obj


def compute_score(w, b, DTE):
    w = w.reshape((len(w), 1))
    S = numpy.zeros((DTE.shape[1]))
    for i in range(len(S)):
        S[i] = numpy.dot(w.T, DTE[:, i]) + b

    return S


def compute_error(S, LTE):
    PL = numpy.zeros((len(LTE)))

    for i in range(len(S)):
        if(S[i] > 0):
            PL[i] = 1

    PL = PL == LTE
    correctPred = sum(PL)

    return (len(LTE)-correctPred)/len(LTE)


D, L = ml.loadFile('../Train.txt')
(DTR, LTR), (DTE, LTE) = split_db_2to1(D, L)
lambdaParam = [0, 1e-6, 1e-3, 1.0]
v = numpy.zeros((DTR.shape[0]+1), dtype='int32')
print('\t\t\t***TABLE**\n')
print('--------------------------------------\n')
print('|\t\t|   J(w*,b*)\t | Error rate  |\n')
print('--------------------------------------\n')
for l in lambdaParam:
    logreg_obj = logreg_obj_wrap(DTR, LTR, l)
    x, f, d = fmin_l_bfgs_b(logreg_obj, v, approx_grad=True)
    w, b = x[0:-1], x[-1]
    S = compute_score(w, b, DTE)
    e = compute_error(S, LTE)
    print(f'| l = {l} |{logreg_obj(x)}|   {round(e*100, 1)}%     |\n')

print('***END***')



			***TABLE**

--------------------------------------

|		|   J(w*,b*)	 | Error rate  |

--------------------------------------



  J += (c * numpy.log1p(numpy.exp(numpy.dot(-w.T, x[:, idx]) - b)) + (1 - c) * numpy.log1p(numpy.exp(numpy.dot(w.T, x[:, idx]) + b)))
  J += (c * numpy.log1p(numpy.exp(numpy.dot(-w.T, x[:, idx]) - b)) + (1 - c) * numpy.log1p(numpy.exp(numpy.dot(w.T, x[:, idx]) + b)))


| l = 0 |[0.69314718]|   9.9%     |

| l = 1e-06 |[0.69314718]|   9.9%     |

| l = 0.001 |[0.69314718]|   9.9%     |

| l = 1.0 |[0.69314718]|   9.9%     |

***END***
