Permalink
Browse files

minor optimizations, getting about 200 more fps

  • Loading branch information...
1 parent 5fb9a24 commit 112e2da5ce15030249c360ab359170d5eeb009e1 @bheads committed Nov 13, 2011
View
@@ -2,3 +2,4 @@
src/*.swp
*.swo
*.DS_Store
+rtrt-build-desktop-Desktop_Qt_4_7_3_for_GCC__Qt_SDK__Release/*
View
@@ -0,0 +1,35 @@
+#ifndef COLOR_H
+#define COLOR_H
+
+#include <modules/vec4.h>
+
+
+/**
+ \brief Clamp a color to min and max values
+
+ Clamps a color vector to a set min and max value. This is used
+ to clamp a color to be used for rendering.
+
+ \param c Color to clamp
+ \param min Min clamp value
+ \param max Max clamp value
+ \returns refrence to clamped color
+*/
+inline color &clamp( color &c, const float min = 0, const float max = 1.0 )
+{
+ asm( "movaps %1, %%xmm0 \n\t"
+ "movss %2, %%xmm1 \n\t"
+ "shufps $0x00, %%xmm1, %%xmm1 \n\t"
+ "minps %%xmm1, %%xmm0 \n\t"
+ "movss %3, %%xmm1 \n\t"
+ "shufps $0x00, %%xmm1, %%xmm1 \n\t"
+ "maxps %%xmm1, %%xmm0 \n\t"
+ "movaps %%xmm0, %0 \n\t"
+ : "=m"(c)
+ : "m"(c), "m"(max), "m"(min)
+ : "xmm0", "xmm1"
+ );
+ return( c );
+}
+
+#endif // COLOR_H
View
@@ -1,5 +1,7 @@
#include "image.h"
+#include <modules/color.h>
+
Image::Image()
: _width(0), _height(0), _data(NULL), ready(false)
{
@@ -56,9 +58,10 @@ void Image::fill_with_color(boost::uint8_t r, boost::uint8_t g, boost::uint8_t b
void Image::fill_with_color(boost::uint32_t c)
{
if(!ready) return;
- for(size_t y = 0; y < _height; ++y)
+#pragma omp parallel for
+ for(ssize_t y = 0; y < _height; ++y)
{
- for(size_t x = 0; x < _width; ++x)
+ for(ssize_t x = 0; x < _width; ++x)
{
_data[at(x,y)].color = c;
}
@@ -77,13 +80,3 @@ void Image::fill_with_random()
}
}
}
-
-void Image::set(size_t x, size_t y, color c)
-{
- if(!ready) return;
- clamp(c);
- _data[at(x,y)].r = c.x*255;
- _data[at(x,y)].g = c.y*255;
- _data[at(x,y)].b = c.z*255;
- _data[at(x,y)].a = 0;
-}
View
@@ -38,7 +38,36 @@ class Image
inline iColor *data() { return(_data); }
inline size_t at(size_t x, size_t y) { return((y * _width) + x); }
- void set(size_t , size_t, color);
+ //void set(size_t , size_t, color);
+
+ inline void set(size_t x, size_t y, color c)
+ {
+ float zero = 0.0f, one = 1.0f, conv = 255.0f;
+
+ if(!ready) return;
+ asm( "movaps %1, %%xmm0 \n\t"
+ "movss %2, %%xmm1 \n\t"
+ "shufps $0x00, %%xmm1, %%xmm1 \n\t"
+ "minps %%xmm1, %%xmm0 \n\t" // clamp min to 0.
+ "movss %3, %%xmm1 \n\t"
+ "shufps $0x00, %%xmm1, %%xmm1 \n\t"
+ "maxps %%xmm1, %%xmm0 \n\t" // clamp max to 1.0
+ "movss %4, %%xmm1 \n\t"
+ "shufps $0x00, %%xmm1, %%xmm1 \n\t"
+ "mulps %%xmm1, %%xmm0\n\t" // multiply by 255
+ "movaps %%xmm0, %0 \n\t"
+ : "=m"(c)
+ : "m"(c), "m"(one), "m"(zero), "m"(conv)
+ : "xmm0", "xmm1"
+ );
+
+ _data[at(x,y)].r = (uint8_t)c.x;
+ _data[at(x,y)].g = (uint8_t)c.y;
+ _data[at(x,y)].b = (uint8_t)c.z;
+ //_data[at(x,y)].a = 0;
+ }
+
+
private:
boost::uint32_t _width, _height;
iColor *_data;
View
@@ -621,35 +621,6 @@ vec4 &cross( vec4 &dest, const vec4 &a, const vec4 &b )
return( dest );
}
-
-/**
- \brief Clamp a color to min and max values
-
- Clamps a color vector to a set min and max value. This is used
- to clamp a color to be used for rendering.
-
- \param c Color to clamp
- \param min Min clamp value
- \param max Max clamp value
- \returns refrence to clamped color
-*/
-color &clamp( color &c, const float min, const float max )
-{
- asm( "movaps %1, %%xmm0 \n\t"
- "movss %2, %%xmm1 \n\t"
- "shufps $0x00, %%xmm1, %%xmm1 \n\t"
- "minps %%xmm1, %%xmm0 \n\t"
- "movss %3, %%xmm1 \n\t"
- "shufps $0x00, %%xmm1, %%xmm1 \n\t"
- "maxps %%xmm1, %%xmm0 \n\t"
- "movaps %%xmm0, %0 \n\t"
- : "=m"(c)
- : "m"(c), "m"(max), "m"(min)
- : "xmm0", "xmm1"
- );
- return( c );
-}
-
/**
\brief Clamp a copied color to min and max values
View
@@ -53,8 +53,9 @@ vec4 &cross( vec4 &, const vec4 &, const vec4 & );///< compute cross into input
//color ops
// color clamping
-color &clamp( color &, const float = 0.0, const float = 1.0 ); /// < Clamp a color to a given value
+//color &clamp( color &, const float = 0.0, const float = 1.0 ); /// < Clamp a color to a given value
const color clamped( const color &, const float = 0.0, const float = 1.0 ); /// < Return a clamped copy
+
#endif
View
@@ -1,7 +1,7 @@
#include "window.h"
Window::Window()
- : running(false), framerate(0), framecount(0), framedelta(0.0), frameupdatetime(glfwGetTime() + 1.0), frameprev(glfwGetTime())
+ : running(false), framerate(0), framecount(0), frameaverage(0), framedelta(0.0), frameupdatetime(glfwGetTime() + 1.0), frameprev(glfwGetTime())
{
int major, minor, revision;
CHECK(glfwInit() == GL_TRUE) << "Failed to initilize GLFW";
@@ -12,7 +12,7 @@ Window::Window()
}
Window::Window(uint32_t width, uint32_t height, bool fullscreen, std::string title, int32_t interval)
- : running(false), framerate(0), framecount(0), framedelta(0.0), frameupdatetime(glfwGetTime() + 1.0), frameprev(glfwGetTime())
+ : running(false), framerate(0), framecount(0), frameaverage(0), framedelta(0.0), frameupdatetime(glfwGetTime() + 1.0), frameprev(glfwGetTime())
{
int major, minor, revision;
glfwGetVersion(&major, &minor, &revision);
@@ -160,6 +160,9 @@ void Window::update_frame_rate()
framerate = framecount;
framecount = 0;
frameupdatetime = frametime + 1.0f;
+ if(frameaverage <= 0) frameaverage = framerate;
+ frameaverage += framerate;
+ frameaverage /= 2;
}
framedelta = frametime - frameprev;
View
@@ -47,12 +47,14 @@ class Window
inline uint32_t width() { return(_width); }
inline uint32_t height(){ return(_height); }
+ inline uint32_t average_framerate() { return(frameaverage); }
+
private:
uint32_t _width, _height;
bool fullscreen, running;
std::string title;
- int32_t framerate, framecount;
+ int32_t framerate, framecount, frameaverage;
double framedelta, frameupdatetime, frameprev;
View
@@ -19,7 +19,7 @@
// Projects includes
#include <cmdflags.h>
#include <modules/window.h>
-#include <modules/image.h>
+//#include <modules/image.h>
void render(Image *back_p);
@@ -62,10 +62,10 @@ int main(int argc, char *argv[])
win.update(); // swap the back buffer with the front buffer
}
}
-
}
LOG(INFO) << "shutting down";
+ LOG(INFO) << "Average frame rate was " << win.average_framerate();
front.destroy_image();
back.destroy_image();
win.destroy_window();
@@ -78,9 +78,11 @@ void render(Image *back_p)
#pragma omp parallel for
for(ssize_t y = 0; y < back_p->height(); ++y)
{
+ float red = (float)omp_get_thread_num()/(float)omp_get_num_threads();
+ float blue = 0;
for(ssize_t x = 0; x < back_p->width(); ++x)
{
- back_p->set(x,y,color((float)omp_get_thread_num()/(float)omp_get_num_threads(),0,0));
+ back_p->set(x,y,color(red, blue, 0));
}
}
}
View
@@ -1,4 +1,6 @@
+ CONFIG -= app_bundle
+
QMAKE_CXXFLAGS += -sse -sse2 -fopenmp
LIBS += -lpthread -lgflags -lglog -framework Cocoa -framework AGL -framework OpenGL -lglfw -lgomp
@@ -17,7 +19,9 @@ HEADERS += \
modules/window.h \
modules/image.h \
modules/vec4.h \
- modules/vec_func.h
+ modules/vec_func.h \
+ modules/color.h
+
View
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE QtCreatorProject>
-<!-- Written by Qt Creator 2.3.1, 2011-11-13T11:17:04. -->
+<!-- Written by Qt Creator 2.3.1, 2011-11-13T13:36:00. -->
<qtcreator>
<data>
<variable>ProjectExplorer.Project.ActiveTarget</variable>
@@ -87,7 +87,7 @@
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Desktop</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Desktop</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Target.DesktopTarget</value>
- <value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
+ <value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">1</value>
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">0</value>
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">

0 comments on commit 112e2da

Please sign in to comment.