Skip to content

Commit

Permalink
More weak learners, cleanup of files, etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
Charles Marsh committed May 7, 2014
1 parent 82c6680 commit 40448fd
Show file tree
Hide file tree
Showing 13 changed files with 67 additions and 168 deletions.
2 changes: 1 addition & 1 deletion .gitignore
@@ -1,7 +1,7 @@
# Custom files
report/
data/
results/
scratch/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
3 changes: 3 additions & 0 deletions ensemblers/ogboost.py
Expand Up @@ -57,6 +57,9 @@ def update(self, features, label):

def predict(self, features):
F = sum(h.predict(features) for h in self.f)
if F > 0:
return 1.0
return -1.0
p1 = (e ** F) / (1 + e ** F)
if p1 >= 0.5:
return 1.0
Expand Down
102 changes: 0 additions & 102 deletions learners/decision_stump.py

This file was deleted.

2 changes: 1 addition & 1 deletion learners/histogram.py
Expand Up @@ -13,7 +13,7 @@ class Histogram(object):

def __init__(self):
self.range = [-1, 1]
self.bins = 100
self.bins = 20
self.bin_size = float(self.range[1] - self.range[0]) / self.bins
self.bin_counts = [defaultdict(int) for i in range(self.bins)]
self.label_counts = defaultdict(int)
Expand Down
3 changes: 1 addition & 2 deletions learners/mlp2.py
Expand Up @@ -46,6 +46,5 @@ def partial_fit(self, x, y, sample_weight=1.0):
self.num_features, 1, nb_classes=self.num_classes)
temp_data.addSample(x.toarray()[0], y)
temp_data._convertToOneOfMany()
trainer = BackpropTrainer(
self.fnn, dataset=temp_data, momentum=0.1, weightdecay=0.01)
trainer = BackpropTrainer(self.fnn, dataset=temp_data)
trainer.trainEpochs(10)
6 changes: 3 additions & 3 deletions learners/random_stump.py
Expand Up @@ -4,11 +4,11 @@

class RandomStump(object):

def __init__(self, classes):
def __init__(self, classes, feature=None):
self.labels = classes
self.label_counts = defaultdict(int)
self.label_sums = defaultdict(int)
self.feature = None
self.feature = feature

def partial_fit(self, example, label, sample_weight=1.0):
if self.feature is None:
Expand All @@ -18,7 +18,7 @@ def partial_fit(self, example, label, sample_weight=1.0):
self.label_counts[label] += sample_weight

def predict(self, x):
if self.feature is None:
if not self.label_counts:
return self.labels[0]

def mean(y):
Expand Down
9 changes: 0 additions & 9 deletions learners/sk_perceptron.py

This file was deleted.

9 changes: 6 additions & 3 deletions main_evaluate.py → main.py
Expand Up @@ -8,10 +8,9 @@
if __name__ == "__main__":
seed(0)

dataset = "breast-cancer_scale.txt"

parser = argparse.ArgumentParser(
description='Test error for a combination of ensembler and weak learner.')
parser.add_argument('dataset', help='dataset filename')
parser.add_argument('ensembler', help='chosen ensembler')
parser.add_argument('weak_learner', help='chosen weak learner')
parser.add_argument('M', metavar='# weak_learners',
Expand All @@ -24,12 +23,14 @@

ensembler = get_ensembler(args.ensembler)
weak_learner = get_weak_learner(args.weak_learner)
data = load_data("data/" + dataset)
data = load_data("data/" + args.dataset)

accuracy, baseline = test(
ensembler, weak_learner, data, args.M, trials=args.trials)

print "Accuracy:"
print accuracy
print "Baseline:"
print baseline[-1]

if args.record:
Expand All @@ -39,6 +40,8 @@
'baseline': baseline[-1],
'booster': args.ensembler,
'weak_learner': args.weak_learner,
'trials': args.trials,
'seed': 0
}
filename = args.ensembler + "_" + \
args.weak_learner + "_" + str(args.M) + ".yml"
Expand Down
27 changes: 18 additions & 9 deletions main_choice.py
Expand Up @@ -6,11 +6,9 @@


if __name__ == "__main__":

dataset = "breast-cancer_scale.txt"

parser = argparse.ArgumentParser(
description='Test error for a combination of ensembler and weak learner.')
description='Test error for every combination of ensembler and weak learner.')
parser.add_argument('dataset', help='dataset filename')
parser.add_argument('ensembler', help='chosen ensembler')
parser.add_argument('M', metavar='# weak_learners',
help='number of weak learners', type=int)
Expand All @@ -24,17 +22,28 @@
performance = {}
performance_baseline = {}
for weak_learner in weak_learners:
data = load_data("data/" + dataset)
data = load_data(args.dataset)
seed(0)

accuracy, baseline = test(
ensembler, weak_learners[weak_learner], data, args.M, trials=args.trials)
performance[weak_learner] = (accuracy[-1], baseline[-1])
try:
accuracy, baseline = test(
ensembler, weak_learners[weak_learner], data, args.M, trials=args.trials)
performance[weak_learner] = (accuracy[-1], baseline[-1])
except AttributeError:
pass

print "Accuracy:"
print performance

if args.record:
results = performance
results['m'] = args.M
results['booster'] = args.ensembler
filename = args.ensembler + "_ALL_" + str(args.M) + ".yml"
results['dataset'] = args.dataset
results['trials'] = args.trials
results['seed'] = 0
dataset_abbrev = args.dataset.split('/')[-1].split('.')[-2]
filename = args.ensembler + "_ALL_" + \
str(args.M) + "_" + dataset_abbrev + ".yml"
f = open(filename, 'w+')
f.write(dump(results))
15 changes: 9 additions & 6 deletions main_num_learners.py
@@ -1,13 +1,15 @@
import argparse
from random import seed
from yaml import dump
from utils.experiment import testNumLearners
from utils.utils import *

if __name__ == "__main__":
dataset = "breast-cancer_scale.txt"
seed(0)

parser = argparse.ArgumentParser(
description='Test error for a combination of ensembler and weak learner.')
parser.add_argument('dataset', help='dataset filename')
parser.add_argument('ensembler', help='chosen ensembler')
parser.add_argument('weak_learner', help='chosen weak learner')
parser.add_argument(
Expand All @@ -17,17 +19,16 @@
'inc', help='increment for number of weak learners', type=int)
parser.add_argument('--record', action='store_const',
const=True, default=False, help='export the results in YAML format')
parser.add_argument(
'trials', help='number of trials (each with different shuffling of the data); defaults to 1', type=int, default=1, nargs='?')
args = parser.parse_args()

ensembler = get_ensembler(args.ensembler)
weak_learner = get_weak_learner(args.weak_learner)
data = load_data("data/" + dataset)
data = load_data("data/" + args.dataset)

from random import shuffle, seed
seed(0)
shuffle(data)
accuracy = testNumLearners(
ensembler, weak_learner, data, args.start, args.end, args.inc)
ensembler, weak_learner, data, args.start, args.end, args.inc, trials=args.trials)

print accuracy

Expand All @@ -36,6 +37,8 @@
'accuracy': accuracy,
'booster': args.ensembler,
'weak_learner': args.weak_learner,
'trials': args.trials,
'seed': 0
}
filename = args.ensembler + "_" + \
args.weak_learner + "_" + \
Expand Down
27 changes: 19 additions & 8 deletions utils/experiment.py
@@ -1,20 +1,22 @@
from random import shuffle
from collections import defaultdict
import numpy as np


def test(Booster, Learner, data, m, trials=1, should_shuffle=True):
results = [run_test(Booster, Learner, data, m, should_shuffle=should_shuffle)
for _ in range(trials)]
results = []
for t in range(trials):
if should_shuffle:
shuffle(data)
results.append(run_test(Booster, Learner, data, m))
results = zip(*results)

def avg(x):
return sum(x) / len(x)
return (map(avg, zip(*results[0])), map(avg, zip(*results[1])))


def run_test(Booster, Learner, data, m, should_shuffle=True):
if should_shuffle:
shuffle(data)

def run_test(Booster, Learner, data, m):
classes = np.unique(np.array([y for (x, y) in data]))
baseline = Learner(classes)
predictor = Booster(Learner, classes=classes, M=m)
Expand All @@ -37,5 +39,14 @@ def run_test(Booster, Learner, data, m, should_shuffle=True):
return performance_booster, performance_baseline


def testNumLearners(Booster, Learner, data, start, end, inc):
return {m: test(Booster, Learner, data, m, should_shuffle=False)[0][-1] for m in range(start, end + 1, inc)}
def testNumLearners(Booster, Learner, data, start, end, inc, trials=1):
results = defaultdict(int)
for t in range(trials):
shuffle(data)
for m in range(start, end + 1, inc):
accuracy = test(Booster, Learner, data, m)[0][-1]
print m, accuracy
results[m] += accuracy
for m in results:
results[m] /= trials
return results
20 changes: 0 additions & 20 deletions utils/plot.py

This file was deleted.

10 changes: 6 additions & 4 deletions utils/utils.py
Expand Up @@ -20,6 +20,7 @@
from learners.histogram import RNB
from learners.winnow import Winnow
from learners.mlp import MLP
from learners.mlp2 import MLP as MLP2


def load_data(filename):
Expand Down Expand Up @@ -47,13 +48,14 @@ def get_ensembler(ensembler_name):
"GaussianNB": GaussianNB,
"BinaryNB": BinaryNB,
"kNN": kNN,
"MLP": MLP,
"DecisionStump": DecisionStump,
#"MLP": MLP,
#"MLP2": MLP2,
#"DecisionStump": DecisionStump,
"DecisionTree": DecisionTree,
"Perceptron": Perceptron,
"RandomStump": RandomStump,
"Winnow": Winnow,
"Histogram": RNB
#"Winnow": Winnow,
#"Histogram": RNB
}


Expand Down

0 comments on commit 40448fd

Please sign in to comment.