Big thread contention speedup -- testing the m_groups_to_compile_count

atomic on every execution was hurting perf for simple shaders called very frequently (as happens for volume shading). Moving it to happen only when shaders are compiled, not every execution, and even then guarding it with the m_greedyjit (which is usually off) improves perf substantially for a certain kind of scene we're trying to speed up.
AcademySoftwareFoundation · Jul 17, 2013 · e70e7de · e70e7de
1 parent 4d8e26a
commit e70e7de
Showing 1 changed file with 4 additions and 6 deletions.
diff --git a/src/liboslexec/context.cpp b/src/liboslexec/context.cpp
@@ -71,21 +71,19 @@ ShadingContext::execute (ShaderUse use, ShadingAttribState &sas,
                          ShaderGlobals &ssg, bool run)
 {
     DASSERT (use == ShadUseSurface);  // FIXME
-
     m_curuse = use;
     m_attribs = &sas;
 
-    if (shadingsys().m_groups_to_compile_count) {
-        // If we are greedily JITing, optimize/JIT everything now
-        shadingsys().optimize_all_groups ();
-    }
-
     // Optimize if we haven't already
     ShaderGroup &sgroup (sas.shadergroup (use));
     if (sgroup.nlayers()) {
         sgroup.start_running ();
         if (! sgroup.optimized()) {
             shadingsys().optimize_group (sas, sgroup);
+            if (shadingsys().m_greedyjit && shadingsys().m_groups_to_compile_count) {
+                // If we are greedily JITing, optimize/JIT everything now
+                shadingsys().optimize_all_groups ();
+            }
         }
         if (sgroup.does_nothing())
             return false;