Skip to content

Commit

Permalink
restructed, extensive use of numpy, some scipiy online learners added
Browse files Browse the repository at this point in the history
  • Loading branch information
Charles Marsh committed Apr 30, 2014
1 parent d163155 commit e8f0eba
Show file tree
Hide file tree
Showing 25 changed files with 251 additions and 344 deletions.
52 changes: 0 additions & 52 deletions adaboost.py

This file was deleted.

52 changes: 0 additions & 52 deletions adaboost2.py

This file was deleted.

58 changes: 0 additions & 58 deletions adaboost3.py

This file was deleted.

Empty file added ensemblers/__init__.py
Empty file.
18 changes: 14 additions & 4 deletions adaboost4.py → ensemblers/adaboost.py
@@ -1,14 +1,18 @@
"""
An online AdaBoost implementation based on Oza & Russel.
"""

from collections import defaultdict
from math import log
from numpy.random import poisson


class AdaBooster(object):

def __init__(self, Learner, M=10):
def __init__(self, Learner, classes, M=10):
self.M = M
self.N = 0
self.learners = [Learner() for i in range(self.M)]
self.learners = [Learner(classes) for i in range(self.M)]
self.wrongWeight = [0 for i in range(self.M)]
self.correctWeight = [0 for i in range(self.M)]
self.epsilon = [0 for i in range(self.M)]
Expand All @@ -18,8 +22,11 @@ def update(self, features, label):
lam = 1.0
for i, learner in enumerate(self.learners):
k = poisson(lam)
if not k:
continue

for _ in range(k):
learner.update(features, label)
learner.partial_fit(features, label)

if learner.predict(features) == label:
self.correctWeight[i] += lam
Expand All @@ -38,12 +45,15 @@ def predict(self, features):
for i, learner in enumerate(self.learners):

def get_classifier_weight(i):
if not (self.wrongWeight[i] + self.correctWeight[i]):
return 0.0

epsilon = float(self.wrongWeight[i]) / \
(self.wrongWeight[i] + self.correctWeight[i])
if epsilon > 0.5:
return 0.0
elif epsilon == 0.0:
epsilon = 0.0001
epsilon = 0.00001

beta = epsilon / (1.0 - epsilon)
return log(1.0 / beta)
Expand Down
15 changes: 11 additions & 4 deletions expbooster.py → ensemblers/expboost.py
@@ -1,19 +1,26 @@
"""
An online boosting algorithm which mixes SmoothBoost with
"Learning from Expert Advice" from Chen '12.
"""


class EXPBooster(object):
def __init__(self, Learner, M=10):

def __init__(self, Learner, classes, M=10):
self.M = M
self.learners = [Learner() for _ in range(self.M)]
self.learners = [Learner(classes) for _ in range(self.M)]
self.w = [1.0 for _ in range(self.M)]

def composite_prediction(self, i, x):
return sum(l.predict(x) for l in self.learners[:i]) / (i+1)
return sum(l.predict(x) for l in self.learners[:i]) / (i + 1)

def update(self, features, label):
for i in range(self.M):
pred = self.composite_prediction(i, features)
if pred * label <= 0:
self.w[i] /= 2
for i in range(self.M):
self.learners[i].update(features, label)
self.learners[i].partial_fit(features, label)

def predict(self, features):
total = sum(self.w[i] * self.composite_prediction(i, features)
Expand Down
15 changes: 11 additions & 4 deletions ocpbooster.py → ensemblers/ocpboost.py
@@ -1,3 +1,9 @@
"""
An online boosting algorithm which mixes SmoothBoost with
Online Convex Programming from Chen '12.
"""


def project(v, z=1.0):
U = set(range(len(v)))
s = 0.0
Expand Down Expand Up @@ -30,14 +36,14 @@ class OCPBooster(object):
theta = gamma / (2 + gamma)
eta = 1.0

def __init__(self, Learner, M=None):
def __init__(self, Learner, classes, M=None):
if not M:
self.M = int(1.0 / (delta * gamma * gamma))
self.M = int(1.0 / (self.delta * self.gamma * self.gamma))
else:
self.M = M
self.z = [0.0 for _ in range(self.M)]
self.alpha = [1.0 / self.M for _ in range(self.M)]
self.learners = [Learner() for _ in range(self.M)]
self.learners = [Learner(classes) for _ in range(self.M)]

def update(self, features, label):

Expand All @@ -63,7 +69,8 @@ def normalize(x):

self.z[i] = initial + label * \
self.learners[i].predict(features) - self.theta
self.learners[i].update(features, label, w=w[i])
self.learners[i].partial_fit(
features, label, sample_weight=w[i])
w.append(min((1 - self.gamma) ** (self.z[i] / 2), 1))

def predict(self, features):
Expand Down
11 changes: 7 additions & 4 deletions ogboost.py → ensemblers/ogboost.py
@@ -1,3 +1,7 @@
"""
An Online Gradient Boost implementaton from Leistner.
"""

from math import log, e
from sys import maxint

Expand All @@ -12,10 +16,10 @@ def loss(x):
def dloss(x):
return - 1.0 / (1.0 + e ** x)

def __init__(self, Learner, M=10, K=10):
def __init__(self, Learner, classes, M=10, K=10):
self.M = M
self.K = K
self.learners = [[Learner() for _ in range(self.K)]
self.learners = [[Learner(classes) for _ in range(self.K)]
for _ in range(self.M)]
self.errors = [[0.0 for _ in range(self.K)]
for _ in range(self.M)]
Expand All @@ -32,8 +36,7 @@ def update(self, features, label):
min_error = maxint
for k in range(self.K):
h = self.learners[m][k]
for _ in range(1 + int(10 * w)):
h.update(features, label)
h.partial_fit(features, label, sample_weight=w)

# Update error weight
if h.predict(features) != label:
Expand Down
Empty file added learners/__init__.py
Empty file.
79 changes: 79 additions & 0 deletions learners/ce_knn.py
@@ -0,0 +1,79 @@
"""
An online (error-correct) kNN algorithm based on Foerster.
"""

from collections import defaultdict
import heapq


def _inc(w):
return 2 - (w - 2) * (w - 2) / 2


def _dec(w):
return w * w / 2


class kNN(object):

def __init__(self):
self.num_points = 0
self.LR = 0.1
self.r = 0.05
self.threshold = 0.1
self.delta = 13
self.weights = {}
self.label_counts = defaultdict(int)

def get_k_nearest(self, x, k):
k = max(k, 1)

def dist(y):
return sum((x - y) * (x - y) for (x, y) in zip(x, y))

pq = []
for (y, c) in self.weights:
w = self.weights[(y, c)]
tagged_y = (-dist(y), y, c, w)
if len(pq) < k:
heapq.heappush(pq, tagged_y)
else:
heapq.heappushpop(pq, tagged_y)

return {(y, c): w for (_, y, c, w) in pq}

def update(self, example, label):
k_nearest = self.get_k_nearest(
example, self.LR * self.num_points)

if self.predict(example, k_nearest=k_nearest) == label:
for (y, c) in k_nearest:
if c == label:
w = self.weights[(y, c)]
self.weights[(y, c)] = _inc(w)
elif self.label_counts[label] < self.delta:

for (y, c) in k_nearest:
if c == label:
w = self.weights[(y, c)]
self.weights[(y, c)] = _dec(w)

self.weights = dict([((y, c), w)
for (y, c), w in self.weights.iteritems() if w >= self.threshold])

self.label_counts[label] += 1
self.weights[(tuple(example), label)] = 1
self.num_points += 1

def predict(self, x, k_nearest=None):
if not self.num_points:
return 1

if not k_nearest:
k_nearest = self.get_k_nearest(x, self.r * self.num_points)

label_weights = defaultdict(int)
for (y, c) in k_nearest:
label_weights[c] += self.weights[(y, c)]

return max(label_weights.iterkeys(), key=(lambda key: label_weights[key]))

0 comments on commit e8f0eba

Please sign in to comment.