In [21]:
# This script is used to randomly generate a set of data points, and apply the adaboost method to classify these data points. 


#from numpy import *
#import data
#import adaboost

import numpy as py

# Read data from Training or Testing file
def func_readData(filename,option):
    if option == 'train':
        fid = open(filename,'r')
        
        label = []
        data = None
        while True:
            fline = fid.readline()
            if len(fline) == 0:     #EOF
                break
            label.append(int(fline[0:fline.find(':')]))
            
            dataNew = []
            i = fline.find(':') + 1
            dataNew = [float(fline[i:fline.find(',',i,-1)])]
            while True:
                i = fline.find(',',i,-1) + 1
                if not i:
                    break;
                dataNew.append(float(fline[i:fline.find(',',i,-1)]))
            if data is None:
                data = py.mat(dataNew)
            else:
                data = py.vstack([data,py.mat(dataNew)])
        fid.close()
        return data,label
    elif option == 'test':
        fid = open(filename,'r')
        data = None
        while True:
            fline = fid.readline()
            if len(fline) == 0:     #EOF
                break    
            dataNew = []
            i=0
            while True:
                dataNew.append(float(fline[i:fline.find(',',i,-1)])) 
                i = fline.find(',',i,-1) + 1
                if not i:
                    break
            if data is None:
                data = py.mat(dataNew)
            else:
                data = py.vstack([data,py.mat(dataNew)])
        fid.close()
        return data
    else:
        print('Wrong input parameter!')


# function for building weak classifiers, i.e.:  stump function

def buildWeakStump(d,l,D): # (data, label, weight)
    dataMatrix = py.mat(d)
    labelmatrix = py.mat(l).T
    m,n = py.shape(dataMatrix)
    numstep = 10.0
    bestStump = {}
    bestClass = py.mat(py.zeros((5,1)))
    minErr = py.inf
    for i in range(n):
        datamin = dataMatrix[:,i].min()
        datamax = dataMatrix[:,i].max()
        stepSize = (datamax - datamin) / numstep
        for j in range(-1,int(numstep)+1):
            for inequal in ['lt','gt']:
                threshold = datamin + float(j) * stepSize
                predict = stumpClassify(dataMatrix,i,threshold,inequal)
                err = py.mat(py.ones((m,1)))
                err[predict == labelmatrix] = 0
                weighted_err = D.T * err;
                if weighted_err < minErr:
                    minErr = weighted_err
                    bestClass = predict.copy()
                    bestStump['dim'] = i
                    bestStump['threshold'] = threshold
                    bestStump['ineq'] = inequal
    return bestStump, minErr, bestClass

# Use a weak classifier, i.e. a decision stump, to classify data

def stumpClassify(datamat,dim,threshold,inequal):
    res = py.ones((py.shape(datamat)[0],1))
    if inequal == 'lt':
        res[datamat[:,dim] <= threshold] = -1.0
    else:
        res[datamat[:,dim] > threshold] = -1.0
    return res

In [22]:
# Boosting Algorithm

def train(data,label,numIt = 1000):
    
    weakClassifiers = []
    #m is the number of samples
    m = py.shape(data)[0]
    # sample weights, 1/m at the beginning
    D = py.mat(py.ones((m,1))/m) 
    
    estStrong = py.mat(py.zeros((m,1)))
    for i in range(numIt):
        # bestStump: weak classifier; error: error rate
        bestStump, error, classEstimate = buildWeakStump(data,label,D)
        print("D:",D.T)
        
        # calculate the weight of the selected decision stump based on its error rate
        alpha = float(0.5*py.log((1.0-error)/max(error,1e-16)))
        
        # add one more field to bestStump, i.e. classifier weight
        bestStump['alpha'] = alpha
        # add bestStump to the list of weak classifiers
        weakClassifiers.append(bestStump)
        print("classEstimate: ",classEstimate.T)

        #calculate sample weights (of all samples) 
        # set sample weights
        expon = py.multiply(-1*alpha*py.mat(label).T,classEstimate)
        D = py.multiply(D,py.exp(expon))
        # normalize D
        D = D/D.sum()
        
        
        estStrong += classEstimate*alpha
        
        EnsembleErrors = py.multiply(py.sign(estStrong)!=py.mat(label).T,\
                                  py.ones((m,1)))  #Converte to float
        
        errorRate = EnsembleErrors.sum()/m
        
        print("current error:  ",errorRate)
        if errorRate == 0.0:
            break
    return weakClassifiers


In [23]:

# Applying an adaboost classifier for a single data sample

def adaboostClassify(dataTest,classifier):
    dataMatrix = py.mat(dataTest)
    m = py.shape(dataMatrix)[0]
    estStrong = py.mat(py.zeros((m,1)))
    for i in range(len(classifier)):

        # call the function stumpClassify()
        classEstimate = stumpClassify(dataMatrix, 
                                      classifier[i]['dim'], 
                                      classifier[i]['threshold'], 
                                      classifier[i]['ineq'])
        # accumulate all predictions
        estStrong += classifier[i]['alpha']*classEstimate

        return py.sign(estStrong)

# Applying an adaboost classifier for all testing samples
def test(dataSet,classifier):
    label = []
    for i in range(py.shape(dataSet)[0]):
        label.append(adaboostClassify(dataSet[i,:],classifier))
    return label



In [59]:

#############. main ##################
# The data files "train.txt" and "test.txt" are randomly generated by the function randomData() and are used to test your developed codes.

trainData,label = func_readData('train.txt','train')
testData = func_readData('test.txt','test')

#training
classifier = train(trainData,label,150)
print('done training\n')
#testing
test(testData,classifier)
print('done testing\n')

D: [[ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005
   0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.

classEstimate:  [[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
  -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
  -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
  -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
  -1. -1.  1. -1. -1. -1.  1. -1.  1. -1.  1. -1. -1. -1. -1. -1. -1. -1.
  -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1.  1. -1. -1. -1. -1. -1.
  -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
   1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1.
  -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1.
  -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
  -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
   1. -1.]]
current error:   0.375
D: [[ 0.00374016  0.00501902  0.00360099  0.005213    0.00619985  0.00393261
   0.0046825   0.0044164   0.00737224  0.00450226  0.00380

current error:   0.36
D: [[ 0.00322452  0.00494449  0.00310453  0.00510894  0.00507855  0.00338711
   0.00494701  0.00443437  0.00792147  0.00463819  0.00362871  0.00492443
   0.01053612  0.00443045  0.00561712  0.00615601  0.00425345  0.006996
   0.00413114  0.00430659  0.00525175  0.0044485   0.00543575  0.00653627
   0.00337025  0.00398811  0.00413114  0.00402927  0.00525175  0.0052649
   0.00532417  0.00470621  0.00456792  0.00432824  0.00525175  0.00348358
   0.00345131  0.00446305  0.00314461  0.00589251  0.00627901  0.0036485
   0.00587567  0.00358037  0.00517304  0.00450088  0.00371182  0.00541443
   0.00582551  0.004185    0.0043025   0.0044188   0.0034728   0.00858757
   0.00374074  0.00583549  0.00343704  0.00497699  0.0038699   0.00415586
   0.00479212  0.00575651  0.0064969   0.0055032   0.00596836  0.00430659
   0.00397742  0.00272611  0.00611084  0.00620398  0.00521198  0.00607754
   0.00312657  0.0055032   0.00306592  0.00503499  0.0039617   0.00631243
   0.00357187  0.

classEstimate:  [[-1. -1.  1.  1. -1.  1. -1.  1. -1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  -1. -1. -1. -1. -1. -1. -1. -1.  1. -1.  1. -1.  1.  1. -1.  1. -1.  1.
  -1.  1.  1.  1.  1. -1. -1.  1. -1.  1.  1. -1.  1.  1. -1.  1.  1.  1.
  -1. -1.  1. -1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1.  1.  1.  1. -1.
  -1. -1.  1. -1. -1.  1.  1. -1.  1. -1.  1. -1. -1. -1.  1. -1. -1.  1.
   1.  1. -1. -1.  1.  1.  1.  1. -1.  1. -1.  1. -1. -1.  1. -1.  1. -1.
   1.  1. -1. -1.  1.  1.  1.  1.  1.  1.  1. -1.  1. -1.  1.  1. -1.  1.
  -1.  1. -1. -1.  1. -1. -1.  1.  1. -1. -1.  1.  1.  1. -1.  1. -1.  1.
   1.  1.  1.  1. -1. -1.  1. -1. -1. -1.  1. -1.  1.  1.  1. -1.  1.  1.
   1. -1. -1.  1.  1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1.  1.  1.
  -1. -1. -1. -1. -1.  1. -1. -1. -1.  1. -1.  1.  1. -1. -1.  1.  1. -1.
   1. -1.]]
current error:   0.345
D: [[ 0.00310399  0.005235    0.00330835  0.00508767  0.00496516  0.00281588
   0.00505945  0.00469824  0.00833043  0.00425169  0.00388

D: [[ 0.00280868  0.00483677  0.00317766  0.0049318   0.00444463  0.00256245
   0.00501939  0.0044869   0.00852344  0.00435394  0.00374442  0.0047866
   0.0125402   0.00431528  0.00507811  0.00692034  0.0038124   0.00911214
   0.00400067  0.00443919  0.00564451  0.00430274  0.00475576  0.00641879
   0.00308962  0.00335134  0.00339093  0.00355875  0.00404601  0.00498968
   0.00491757  0.00520984  0.00467315  0.00410692  0.00529534  0.00332732
   0.00300529  0.00411918  0.00317398  0.00625653  0.00641917  0.00308459
   0.00563419  0.00335634  0.00484234  0.00473808  0.00376596  0.00509839
   0.00666781  0.00314552  0.00431066  0.00487298  0.00313205  0.01120446
   0.00378935  0.0066106   0.00337199  0.00453603  0.0032905   0.00416099
   0.00452528  0.00538506  0.00613988  0.00678126  0.00603159  0.00310593
   0.00373895  0.0019661   0.00795428  0.0074881   0.00544621  0.00551354
   0.00258117  0.00600778  0.00232254  0.00496152  0.00450826  0.00634314
   0.00305496  0.00681413  0.0020767

classEstimate:  [[ 1.  1. -1. -1.  1. -1.  1. -1.  1.  1. -1. -1. -1. -1. -1. -1. -1.  1.
   1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1. -1.  1. -1.
   1. -1.  1. -1. -1.  1.  1.  1.  1. -1.  1.  1. -1. -1.  1.  1.  1.  1.
   1.  1.  1.  1.  1. -1.  1. -1.  1.  1. -1.  1. -1. -1. -1. -1. -1.  1.
   1.  1. -1.  1.  1. -1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. -1.
  -1. -1.  1.  1. -1. -1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1. -1.  1.
   1.  1.  1.  1.  1. -1. -1. -1. -1. -1. -1.  1.  1.  1.  1.  1.  1.  1.
   1. -1.  1.  1. -1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1. -1.  1. -1.
  -1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. -1.  1.  1. -1.
  -1.  1.  1.  1. -1.  1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1. -1. -1.
   1.  1.  1.  1.  1. -1.  1.  1.  1. -1.  1. -1. -1.  1.  1. -1.  1.  1.
   1.  1.]]
current error:   0.305
D: [[ 0.00291773  0.0044569   0.00332252  0.005079    0.00427077  0.00250896
   0.00493999  0.00433528  0.00878724  0.00409188  0.00392

current error:   0.295
D: [[ 0.00286663  0.00436458  0.00338     0.00500981  0.0045565   0.00239225
   0.00486583  0.00409827  0.00897996  0.00390153  0.00398798  0.00545577
   0.012201    0.00428259  0.00526343  0.00638796  0.00342938  0.00920832
   0.00429157  0.00465484  0.00549968  0.00437472  0.00420016  0.00707392
   0.00269243  0.00293694  0.00339441  0.00329561  0.003861    0.00502677
   0.00441283  0.00527145  0.00496897  0.0037388   0.00498156  0.003084
   0.00269681  0.00394244  0.00327423  0.00640021  0.00648295  0.00309575
   0.00527518  0.00313975  0.00492869  0.00520472  0.00401924  0.00510988
   0.00719289  0.0028941   0.00407032  0.00463763  0.00284834  0.0107862
   0.00414072  0.00724924  0.00361636  0.0042483   0.00325272  0.00422797
   0.00460918  0.00510272  0.00581752  0.00740533  0.0054028   0.00283338
   0.00396233  0.00183678  0.00851058  0.00739049  0.00595179  0.00549417
   0.00240342  0.00656066  0.00243671  0.00528959  0.00468265  0.00664663
   0.00298875  