Skip to content

Commit

Permalink
Added the code used in the speed tests (this is more complicated to r…
Browse files Browse the repository at this point in the history
…ead due to optimization)
  • Loading branch information
bwhite committed Aug 2, 2010
1 parent c1e3b14 commit 8df9041
Show file tree
Hide file tree
Showing 11 changed files with 401 additions and 0 deletions.
6 changes: 6 additions & 0 deletions performance/SConstruct
@@ -0,0 +1,6 @@
import distutils.sysconfig
env = Environment()
env.Replace(CXX = 'g++')
env.Append(CCFLAGS = '-O3 -Wall -msse -msse2 -ffast-math -ftree-vectorizer-verbose=2 -msse3 -ftree-vectorize')

env.SharedLibrary('bgsub_fast', ['bgsub_fast.c'])
62 changes: 62 additions & 0 deletions performance/bgsub.py
@@ -0,0 +1,62 @@
#!/usr/bin/env python
import numpy as np
import StringIO
import Image
import hadoopy

import bgsub_fast

class Mapper(object):

@staticmethod
def _compute_blockid(image_id):
images_in_block = 500
return str(int(image_id) / images_in_block)

def map(self, video_id, value):
image_id, image = value
block_id = self._compute_blockid(image_id)
video_image_id = '-'.join((video_id, str(image_id)))
video_block_flag_id = '%s-%s\t' %(video_id, block_id)
yield video_block_flag_id + '1', (video_image_id, image)
yield video_block_flag_id + '2', (video_image_id, image)

def reducer(key, values):
for value in values:
yield key, value

class Reducer(object):

def reduce(self, key, values):
return self._handle_flag1(values) if key[-1] == '1' else self._handle_flag2(values)

@staticmethod
def _load_image(image):
return Image.open(StringIO.StringIO(image)).convert('L').tostring()

def _handle_flag1(self, values):
c, s, ss = 0, None, None
for image_id, image in values:
image = self._load_image(image)
c += 1
if s == None:
s = np.zeros(len(image), dtype=np.float32)
ss = np.zeros(len(image), dtype=np.float32)
bgsub_fast.accum(image, s, ss)
self.m = np.zeros(s.shape, dtype=np.float32)
self.v = np.zeros(s.shape, dtype=np.float32)
bgsub_fast.mean_var(s, ss, c, self.m, self.v)

def _handle_flag2(self, values):
fg = None
for image_id, image in values:
image = self._load_image(image)
if fg == None:
fg = np.zeros(len(image), dtype=np.float32)
fg_mask = bgsub_fast.classify(image, self.m, self.v, fg)
yield image_id, fg_mask.tostring()


if __name__ == "__main__":
if hadoopy.run(Mapper, Reducer):
hadoopy.print_doc_quit(__doc__)
24 changes: 24 additions & 0 deletions performance/bgsub_fast.c
@@ -0,0 +1,24 @@
void bgsub_accum(unsigned char *image, int size, float *s, float *ss) {
int i;
for (i = 0; i < size; ++i) {
s[i] += image[i];
ss[i] += image[i] * image[i];
}
}

void bgsub_mean_var(int size, float *s, float *ss, int c, float *m, float *v) {
float inv_c_sqr = 6.25f / (c * c);
float inv_c = 1.f / c;
int i;
for (i = 0; i < size; ++i) {
m[i] = s[i] * inv_c;
v[i] = (ss[i] * c - s[i] * s[i]) * inv_c_sqr;
}
}

void bgsub_classify(unsigned char *image, int size, float *m, float *bgsub) {
int i;
for (i = 0; i < size; ++i) {
bgsub[i] = (image[i] - m[i]) * (image[i] - m[i]);
}
}
55 changes: 55 additions & 0 deletions performance/bgsub_fast.py
@@ -0,0 +1,55 @@
import numpy as np
import ctypes

# Required Types
_uint8_ptr = ctypes.POINTER(ctypes.c_uint8)
_uint32_ptr = ctypes.POINTER(ctypes.c_uint32)
_float_ptr = ctypes.POINTER(ctypes.c_float)
_int = ctypes.c_int32

# Load library
_bg = np.ctypeslib.load_library('libbgsub_fast', '.')

#void bgsub_accum(unsigned int *image, int size, float *s, float *ss) {
_bg.bgsub_accum.restype = ctypes.c_int
_bg.bgsub_accum.argtypes = [ctypes.c_char_p, _int, _float_ptr, _float_ptr]
def accum(image, s, ss):
_bg.bgsub_accum(image,
len(image),
s.ctypes.data_as(_float_ptr),
ss.ctypes.data_as(_float_ptr))


#void bgsub_mean_var(int size, float *s, float *ss, int c, float *m, float *v)
_bg.bgsub_mean_var.restype = ctypes.c_int
_bg.bgsub_mean_var.argtypes = [_int, _float_ptr, _float_ptr, _int, _float_ptr, _float_ptr]
def mean_var(s, ss, c, m, v):
_bg.bgsub_mean_var(len(s),
s.ctypes.data_as(_float_ptr),
ss.ctypes.data_as(_float_ptr),
c,
m.ctypes.data_as(_float_ptr),
v.ctypes.data_as(_float_ptr))


#void bgsub_classify(unsigned char *image, int size, float *m, float *bgsub)
_bg.bgsub_classify.restype = ctypes.c_int
_bg.bgsub_classify.argtypes = [ctypes.c_char_p, _int, _float_ptr, _float_ptr]
def classify(image, m, v, fg):
_bg.bgsub_classify(image,
len(image),
m.ctypes.data_as(_float_ptr),
fg.ctypes.data_as(_float_ptr))
out = (fg > v).astype(np.uint8)
out *= 255
return out


if __name__ == '__main__':
image = 'abc'
# accum
s = np.zeros(len(image), dtype=np.float32)
ss = np.zeros(len(image), dtype=np.float32)
accum(image, s, ss)
print(s)
print(ss)
25 changes: 25 additions & 0 deletions performance/bgsub_fast_sse.c
@@ -0,0 +1,25 @@
void bgsub_accum(unsigned int *image, int size, unsigned int *s, unsigned int *ss) {
int i;
for (i = 0; i < size; i++) {
s[i] += image[i];
ss[i] += image[i] * image[i];
}
}

void bgsub_mean_var(int size, float *s, float *ss, int c, float *m, float *v) {
float inv_c_sqr = 6.25 / (c * c);
float inv_c = 1. / c;
int i;
for (i = 0; i < size; ++i) {
m[i] = s[i] * inv_c;
v[i] = (ss[i] * c - s[i] * s[i]) * inv_c_sqr;
}
}

void bgsub_classify(float *image, int size, float *m, float *v, unsigned char *bgsub) {
int i;
float val;
for (i = 0; i < size; ++i) {
bgsub[i] = (image[i] - m[i]) * (image[i] - m[i]) > v[i];
}
}
46 changes: 46 additions & 0 deletions performance/bgsub_paper.py
@@ -0,0 +1,46 @@
#!/usr/bin/env python
import StringIO

import hadoopy
import Image
import numpy as np


class Mapper(object):
@staticmethod
def _compute_blockid(image_id):
images_in_block = 500
return str(int(image_id) / images_in_block)

def map(self, image_id, image):
image_id, image = value
block_id = self._compute_blockid(image_id)
yield block_id + '-1', (image_id, image)
yield block_id + '-2', (image_id, image)


class Reducer(object):
@staticmethod
def _load_image(image):
image = Image.open(StringIO.StringIO(image)).convert('L').tostring()
image = np.fromstring(image, dtype=np.uint8)
return np.array(image, dtype=np.uint32)

def reduce(self, key, values):
values = ((d, self._load_image(i)) for d, i in values)
if key[-1] == '0':
c = s = ss = 0
for d, i in values:
c += 1
s += i
ss += i**2
self.m = s / c
self.v = (ss - s**2 / c) / c
else:
for d, i in values:
b = (i - self.m)**2 > 6.25 * self.v
yield d, b.tostring()

if __name__ == "__main__":
if hadoopy.run(Mapper, Reducer):
hadoopy.print_doc_quit(__doc__)
19 changes: 19 additions & 0 deletions performance/dump_bg.py
@@ -0,0 +1,19 @@
import glob
import base64
import os

import Image
import hadoopy

FILE = '/tmp/bwhite/output/pets2006.video_frame_data.b/0.903472866947'
OUTPUT = 'out'

try:
os.mkdir(OUTPUT)
except OSError:
pass

for name, data in hadoopy.cat(FILE):
if name == '1-1-2241':
print(name)
Image.fromstring('L', (720, 576), data).save(OUTPUT + '/' + name + '.jpg')
19 changes: 19 additions & 0 deletions performance/image_convert.py
@@ -0,0 +1,19 @@
#!/usr/bin/env python
import base64

import hadoopy


def mapper(key, value):
video, frame, data = value.split('\t')
yield video, (int(frame), base64.b64decode(data))


def reducer(key, values):
for value in values:
yield key, value


if __name__ == "__main__":
if hadoopy.run(mapper, reducer):
hadoopy.print_doc_quit(__doc__)
71 changes: 71 additions & 0 deletions performance/kmeans_cluster.py
@@ -0,0 +1,71 @@
#!/usr/bin/env python
import os
import time
import cPickle as pickle

import numpy as np

import hadoopy

import profile

class Mapper(profile.ProfileJob):
def __init__(self):
super(Mapper, self).__init__()
with open(os.environ["CLUSTERS_PKL"]) as fp:
self.clusters = pickle.load(fp)
self.nn = __import__(os.environ['NN_MODULE'],
fromlist=['nn']).nn

def map(self, key, feat_str):
# Extends the array by 1 dim that has a 1. in it
feat_str += '\x00\x00\x80?'
feat = np.fromstring(feat_str, dtype=np.float32)
nearest_ind = self.nn(feat[0:-1], self.clusters)[0]
# Expand the array by 1 and use it to normalize later
yield nearest_ind, feat_str

def close(self):
super(Mapper, self).close()


class Combiner(profile.ProfileJob):
def __init__(self):
super(Combiner, self).__init__()

def reduce(self, key, values):
cur_cluster_sum = None
for vec in values:
vec = np.fromstring(vec, dtype=np.float32)
try:
cur_cluster_sum += vec
except TypeError:
cur_cluster_sum = vec
yield key, cur_cluster_sum.tostring()

def close(self):
super(Combiner, self).close()


class Reducer(profile.ProfileJob):
def __init__(self):
super(Reducer, self).__init__()

def reduce(self, key, values):
cur_cluster_sum = None
for vec in values:
vec = np.fromstring(vec, dtype=np.float32)
try:
cur_cluster_sum += vec
except TypeError:
cur_cluster_sum = vec
center = cur_cluster_sum[0:-1] / cur_cluster_sum[-1]
yield key, center.tostring()

def close(self):
super(Reducer, self).close()


if __name__ == "__main__":
if hadoopy.run(Mapper, Reducer, Combiner):
hadoopy.print_doc_quit(__doc__)
57 changes: 57 additions & 0 deletions performance/kmeans_cluster_imc.py
@@ -0,0 +1,57 @@
#!/usr/bin/env python
import os
import time
import itertools
import cPickle as pickle

import numpy as np

import hadoopy

import profile

class Mapper(profile.ProfileJob):
def __init__(self):
super(Mapper, self).__init__()
with open(os.environ["CLUSTERS_PKL"]) as fp:
self.clusters = pickle.load(fp)
self.out_sums = {}
self.nn = __import__(os.environ['NN_MODULE'],
fromlist=['nn']).nn

def map(self, key, feat):
# Extends the array by 1 dim that has a 1. in it
feat = np.fromstring(feat + '\x00\x00\x80?', dtype=np.float32)
nearest_ind = self.nn(feat[0:-1], self.clusters)[0]
try:
self.out_sums[nearest_ind] += feat
except KeyError:
self.out_sums[nearest_ind] = feat

def close(self):
for nearest_ind, out_sum in self.out_sums.iteritems():
yield nearest_ind, out_sum.tostring()
super(Mapper, self).close()

class Reducer(profile.ProfileJob):
def __init__(self):
super(Reducer, self).__init__()

def reduce(self, key, values):
cur_cluster_sum = None
for vec in values:
vec = np.fromstring(vec, dtype=np.float32)
try:
cur_cluster_sum += vec
except TypeError:
cur_cluster_sum = vec
center = cur_cluster_sum[0:-1] / cur_cluster_sum[-1]
yield key, center.tostring()

def close(self):
super(Reducer, self).close()


if __name__ == "__main__":
if hadoopy.run(Mapper, Reducer):
hadoopy.print_doc_quit(__doc__)

0 comments on commit 8df9041

Please sign in to comment.