Skip to content

Commit

Permalink
increase loop size limit in unrolling (we gained some more intermedia…
Browse files Browse the repository at this point in the history
…te instructions)
  • Loading branch information
aras-p committed Feb 20, 2014
1 parent 1f0973c commit 6b07298
Show file tree
Hide file tree
Showing 3 changed files with 219 additions and 116 deletions.
2 changes: 1 addition & 1 deletion src/glsl/loop_unroll.cpp
Expand Up @@ -253,7 +253,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
*/ */
loop_unroll_count count(&ir->body_instructions); loop_unroll_count count(&ir->body_instructions);


if (count.fail || count.nodes * iterations > (int)max_iterations * 15) if (count.fail || count.nodes * iterations > (int)max_iterations * 25)
return visit_continue; return visit_continue;


/* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
Expand Down
102 changes: 85 additions & 17 deletions tests/fragment/zunity-MotionBlur-TileMax-out.txt
Expand Up @@ -8,24 +8,92 @@ void main ()
uvCorner_3 = xlv_TEXCOORD0; uvCorner_3 = xlv_TEXCOORD0;
mx_2 = texture2D (_MainTex, xlv_TEXCOORD0).xy; mx_2 = texture2D (_MainTex, xlv_TEXCOORD0).xy;
for (int j_1 = 0; j_1 < 8; j_1++) { for (int j_1 = 0; j_1 < 8; j_1++) {
for (int l_4 = 0; l_4 < 8; l_4++) { vec2 tmpvar_4;
vec2 tmpvar_5; tmpvar_4.x = 0.0;
tmpvar_5.x = float(l_4); tmpvar_4.y = float(j_1);
tmpvar_5.y = float(j_1); vec4 tmpvar_5;
vec4 tmpvar_6; tmpvar_5 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_4 * _MainTex_TexelSize.xy)));
tmpvar_6 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_5 * _MainTex_TexelSize.xy))); mx_2 = mix (mx_2, tmpvar_5.xy, vec2(float((
mx_2 = mix (mx_2, tmpvar_6.xy, vec2(float(( dot (tmpvar_5.xy, tmpvar_5.xy)
dot (tmpvar_6.xy, tmpvar_6.xy) >=
>= dot (mx_2, mx_2)
dot (mx_2, mx_2) ))));
)))); vec2 tmpvar_6;
}; tmpvar_6.x = 1.0;
tmpvar_6.y = float(j_1);
vec4 tmpvar_7;
tmpvar_7 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_6 * _MainTex_TexelSize.xy)));
mx_2 = mix (mx_2, tmpvar_7.xy, vec2(float((
dot (tmpvar_7.xy, tmpvar_7.xy)
>=
dot (mx_2, mx_2)
))));
vec2 tmpvar_8;
tmpvar_8.x = 2.0;
tmpvar_8.y = float(j_1);
vec4 tmpvar_9;
tmpvar_9 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_8 * _MainTex_TexelSize.xy)));
mx_2 = mix (mx_2, tmpvar_9.xy, vec2(float((
dot (tmpvar_9.xy, tmpvar_9.xy)
>=
dot (mx_2, mx_2)
))));
vec2 tmpvar_10;
tmpvar_10.x = 3.0;
tmpvar_10.y = float(j_1);
vec4 tmpvar_11;
tmpvar_11 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_10 * _MainTex_TexelSize.xy)));
mx_2 = mix (mx_2, tmpvar_11.xy, vec2(float((
dot (tmpvar_11.xy, tmpvar_11.xy)
>=
dot (mx_2, mx_2)
))));
vec2 tmpvar_12;
tmpvar_12.x = 4.0;
tmpvar_12.y = float(j_1);
vec4 tmpvar_13;
tmpvar_13 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_12 * _MainTex_TexelSize.xy)));
mx_2 = mix (mx_2, tmpvar_13.xy, vec2(float((
dot (tmpvar_13.xy, tmpvar_13.xy)
>=
dot (mx_2, mx_2)
))));
vec2 tmpvar_14;
tmpvar_14.x = 5.0;
tmpvar_14.y = float(j_1);
vec4 tmpvar_15;
tmpvar_15 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_14 * _MainTex_TexelSize.xy)));
mx_2 = mix (mx_2, tmpvar_15.xy, vec2(float((
dot (tmpvar_15.xy, tmpvar_15.xy)
>=
dot (mx_2, mx_2)
))));
vec2 tmpvar_16;
tmpvar_16.x = 6.0;
tmpvar_16.y = float(j_1);
vec4 tmpvar_17;
tmpvar_17 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_16 * _MainTex_TexelSize.xy)));
mx_2 = mix (mx_2, tmpvar_17.xy, vec2(float((
dot (tmpvar_17.xy, tmpvar_17.xy)
>=
dot (mx_2, mx_2)
))));
vec2 tmpvar_18;
tmpvar_18.x = 7.0;
tmpvar_18.y = float(j_1);
vec4 tmpvar_19;
tmpvar_19 = texture2D (_MainTex, (uvCorner_3 + (tmpvar_18 * _MainTex_TexelSize.xy)));
mx_2 = mix (mx_2, tmpvar_19.xy, vec2(float((
dot (tmpvar_19.xy, tmpvar_19.xy)
>=
dot (mx_2, mx_2)
))));
}; };
vec4 tmpvar_7; vec4 tmpvar_20;
tmpvar_7.zw = vec2(0.0, 0.0); tmpvar_20.zw = vec2(0.0, 0.0);
tmpvar_7.xy = mx_2; tmpvar_20.xy = mx_2;
gl_FragData[0] = tmpvar_7; gl_FragData[0] = tmpvar_20;
} }




// inputs: 1, stats: 16 alu 2 tex 4 flow // inputs: 1, stats: 76 alu 9 tex 2 flow
231 changes: 133 additions & 98 deletions tests/vertex/z-NichsHybridLight-out.txt
@@ -1,25 +1,25 @@
varying vec2 xlv_TEXCOORD2;
varying vec3 xlv_COLOR;
varying vec3 xlv_TEXCOORD1;
varying vec3 xlv_TEXCOORD0;
attribute vec4 TANGENT;
uniform vec4 _MainTex_ST;
uniform vec4 LightCol[30];
uniform vec4 LightPos[30];
uniform vec4 IngameGridLimit;
uniform sampler2D GridLightTexture;
uniform vec3 GridLightTextureCellSize;
uniform vec3 GridLightTextureStartCell;
uniform vec4 _HybridSunCol;
uniform vec3 _HybridSunDir;
uniform vec3 _Tonemap_colorScale2;
uniform vec3 _Tonemap_colorScale1;
uniform vec3 _Tonemap_shoulder;
uniform vec3 _Tonemap_heel;
uniform vec3 _Tonemap_toeLength;
uniform vec3 _Tonemap_blackLevel;
uniform mat4 _World2Object;
uniform mat4 _Object2World; uniform mat4 _Object2World;
uniform mat4 _World2Object;
uniform vec3 _Tonemap_blackLevel;
uniform vec3 _Tonemap_toeLength;
uniform vec3 _Tonemap_heel;
uniform vec3 _Tonemap_shoulder;
uniform vec3 _Tonemap_colorScale1;
uniform vec3 _Tonemap_colorScale2;
uniform vec3 _HybridSunDir;
uniform vec4 _HybridSunCol;
uniform vec3 GridLightTextureStartCell;
uniform vec3 GridLightTextureCellSize;
uniform sampler2D GridLightTexture;
uniform vec4 IngameGridLimit;
uniform vec4 LightPos[30];
uniform vec4 LightCol[30];
uniform vec4 _MainTex_ST;
attribute vec4 TANGENT;
varying vec3 xlv_TEXCOORD0;
varying vec3 xlv_TEXCOORD1;
varying vec3 xlv_COLOR;
varying vec2 xlv_TEXCOORD2;
void main () void main ()
{ {
vec3 tmpvar_1; vec3 tmpvar_1;
Expand All @@ -43,7 +43,9 @@ void main ()
tmpvar_8 = (gl_Color.xyz * 3.0); tmpvar_8 = (gl_Color.xyz * 3.0);
tmpvar_7 = (tmpvar_8 * tmpvar_8); tmpvar_7 = (tmpvar_8 * tmpvar_8);
ivec4 tmpvar_9; ivec4 tmpvar_9;
tmpvar_9 = ivec4((texture2DLod (GridLightTexture, clamp (((tmpvar_1 - GridLightTextureStartCell) * GridLightTextureCellSize).xz, IngameGridLimit.xy, IngameGridLimit.zw), 0.0) * 255.0)); tmpvar_9 = ivec4((texture2DLod (GridLightTexture, clamp (
((tmpvar_1 - GridLightTextureStartCell) * GridLightTextureCellSize)
.xz, IngameGridLimit.xy, IngameGridLimit.zw), 0.0) * 255.0));
vec4 tmpvar_10; vec4 tmpvar_10;
vec4 tmpvar_11; vec4 tmpvar_11;
vec4 tmpvar_12; vec4 tmpvar_12;
Expand All @@ -70,96 +72,129 @@ void main ()
tmpvar_23 = dot (tmpvar_22, tmpvar_22); tmpvar_23 = dot (tmpvar_22, tmpvar_22);
vec3 tmpvar_24; vec3 tmpvar_24;
tmpvar_24 = (tmpvar_22 * inversesqrt(tmpvar_23)); tmpvar_24 = (tmpvar_22 * inversesqrt(tmpvar_23));
atten_19.x = max ((1.0 - ((tmpvar_23 * tmpvar_10.w) * 0.8)), 0.0); vec4 tmpvar_25;
atten_19.x = (atten_19.x * atten_19.x); tmpvar_25 = atten_19; tmpvar_25.x = max ((1.0 - (
float tmpvar_25; (tmpvar_23 * tmpvar_10.w)
tmpvar_25 = ((atten_19.x * tmpvar_11.w) * max (dot (tmpvar_24, tmpvar_3), 0.05)); * 0.8)), 0.0);
lAgg_21 = (tmpvar_25 * tmpvar_24); vec4 tmpvar_26;
wAgg_20 = (0.001 + tmpvar_25); tmpvar_26 = tmpvar_25; tmpvar_26.x = (tmpvar_25.x * tmpvar_25.x);
vec3 tmpvar_26;
tmpvar_26 = (tmpvar_12.xyz - tmpvar_1);
float tmpvar_27; float tmpvar_27;
tmpvar_27 = dot (tmpvar_26, tmpvar_26); tmpvar_27 = ((tmpvar_26.x * tmpvar_11.w) * max (dot (tmpvar_24, tmpvar_3), 0.05));
lAgg_21 = (tmpvar_27 * tmpvar_24);
wAgg_20 = (0.001 + tmpvar_27);
vec3 tmpvar_28; vec3 tmpvar_28;
tmpvar_28 = (tmpvar_26 * inversesqrt(tmpvar_27)); tmpvar_28 = (tmpvar_12.xyz - tmpvar_1);
atten_19.y = max ((1.0 - ((tmpvar_27 * tmpvar_12.w) * 0.8)), 0.0);
atten_19.y = (atten_19.y * atten_19.y);
float tmpvar_29; float tmpvar_29;
tmpvar_29 = ((atten_19.y * tmpvar_13.w) * max (dot (tmpvar_28, tmpvar_3), 0.05)); tmpvar_29 = dot (tmpvar_28, tmpvar_28);
lAgg_21 = (lAgg_21 + (tmpvar_29 * tmpvar_28));
wAgg_20 = (wAgg_20 + tmpvar_29);
vec3 tmpvar_30; vec3 tmpvar_30;
tmpvar_30 = (tmpvar_14.xyz - tmpvar_1); tmpvar_30 = (tmpvar_28 * inversesqrt(tmpvar_29));
float tmpvar_31; vec4 tmpvar_31;
tmpvar_31 = dot (tmpvar_30, tmpvar_30); tmpvar_31 = tmpvar_26; tmpvar_31.y = max ((1.0 - (
vec3 tmpvar_32; (tmpvar_29 * tmpvar_12.w)
tmpvar_32 = (tmpvar_30 * inversesqrt(tmpvar_31)); * 0.8)), 0.0);
atten_19.z = max ((1.0 - ((tmpvar_31 * tmpvar_14.w) * 0.8)), 0.0); vec4 tmpvar_32;
atten_19.z = (atten_19.z * atten_19.z); tmpvar_32 = tmpvar_31; tmpvar_32.y = (tmpvar_31.y * tmpvar_31.y);
float tmpvar_33; float tmpvar_33;
tmpvar_33 = ((atten_19.z * tmpvar_15.w) * max (dot (tmpvar_32, tmpvar_3), 0.05)); tmpvar_33 = ((tmpvar_32.y * tmpvar_13.w) * max (dot (tmpvar_30, tmpvar_3), 0.05));
lAgg_21 = (lAgg_21 + (tmpvar_33 * tmpvar_32)); lAgg_21 = (lAgg_21 + (tmpvar_33 * tmpvar_30));
wAgg_20 = (wAgg_20 + tmpvar_33); wAgg_20 = (wAgg_20 + tmpvar_33);
vec3 tmpvar_34; vec3 tmpvar_34;
tmpvar_34 = (tmpvar_16.xyz - tmpvar_1); tmpvar_34 = (tmpvar_14.xyz - tmpvar_1);
float tmpvar_35; float tmpvar_35;
tmpvar_35 = dot (tmpvar_34, tmpvar_34); tmpvar_35 = dot (tmpvar_34, tmpvar_34);
vec3 tmpvar_36; vec3 tmpvar_36;
tmpvar_36 = (tmpvar_34 * inversesqrt(tmpvar_35)); tmpvar_36 = (tmpvar_34 * inversesqrt(tmpvar_35));
atten_19.w = max ((1.0 - ((tmpvar_35 * tmpvar_16.w) * 0.8)), 0.0); vec4 tmpvar_37;
atten_19.w = (atten_19.w * atten_19.w); tmpvar_37 = tmpvar_32; tmpvar_37.z = max ((1.0 - (
float tmpvar_37; (tmpvar_35 * tmpvar_14.w)
tmpvar_37 = ((atten_19.w * tmpvar_17.w) * max (dot (tmpvar_36, tmpvar_3), 0.05)); * 0.8)), 0.0);
lAgg_21 = (lAgg_21 + (tmpvar_37 * tmpvar_36)); vec4 tmpvar_38;
wAgg_20 = (wAgg_20 + tmpvar_37); tmpvar_38 = tmpvar_37; tmpvar_38.z = (tmpvar_37.z * tmpvar_37.z);
float tmpvar_38; float tmpvar_39;
tmpvar_38 = (dot (tmpvar_7, vec3(0.22, 0.707, 0.071)) * 2.0); tmpvar_39 = ((tmpvar_38.z * tmpvar_15.w) * max (dot (tmpvar_36, tmpvar_3), 0.05));
vec3 tmpvar_39; lAgg_21 = (lAgg_21 + (tmpvar_39 * tmpvar_36));
tmpvar_39 = (lAgg_21 + ((((nn_4.xyz * 2.0) + vec3(0.0, 0.0, -1.0)) * (gl_Color.w * 2.0)) * tmpvar_38)); wAgg_20 = (wAgg_20 + tmpvar_39);
lAgg_21 = tmpvar_39; vec3 tmpvar_40;
float tmpvar_40; tmpvar_40 = (tmpvar_16.xyz - tmpvar_1);
tmpvar_40 = (wAgg_20 + tmpvar_38); float tmpvar_41;
wAgg_20 = tmpvar_40; tmpvar_41 = dot (tmpvar_40, tmpvar_40);
hybridCol_18 = (tmpvar_7 + (tmpvar_11.xyz * atten_19.x));
hybridCol_18 = (hybridCol_18 + (tmpvar_13.xyz * atten_19.y));
hybridCol_18 = (hybridCol_18 + (tmpvar_15.xyz * atten_19.z));
hybridCol_18 = (hybridCol_18 + (tmpvar_17.xyz * atten_19.w));
vec3 x_41;
x_41 = (hybridCol_18 * 0.25);
vec3 tmpvar_42; vec3 tmpvar_42;
tmpvar_42 = max ((x_41 - (_Tonemap_blackLevel * 0.25)), vec3(0.0, 0.0, 0.0)); tmpvar_42 = (tmpvar_40 * inversesqrt(tmpvar_41));
vec3 tmpvar_43; vec4 tmpvar_43;
tmpvar_43 = ((tmpvar_42 * ((_Tonemap_colorScale1 * tmpvar_42) + _Tonemap_heel)) / ((tmpvar_42 * ((_Tonemap_colorScale2 * tmpvar_42) + _Tonemap_shoulder)) + _Tonemap_toeLength)); tmpvar_43 = tmpvar_38; tmpvar_43.w = max ((1.0 - (
x_41 = tmpvar_43; (tmpvar_41 * tmpvar_16.w)
vec3 x_44; * 0.8)), 0.0);
x_44 = ((_HybridSunCol.xyz * dot (tmpvar_3, _HybridSunDir)) * 0.25); vec4 tmpvar_44;
vec3 tmpvar_45; tmpvar_44 = tmpvar_43; tmpvar_44.w = (tmpvar_43.w * tmpvar_43.w);
tmpvar_45 = max ((x_44 - (_Tonemap_blackLevel * 0.25)), vec3(0.0, 0.0, 0.0)); atten_19 = tmpvar_44;
vec3 tmpvar_46; float tmpvar_45;
tmpvar_46 = ((tmpvar_45 * ((_Tonemap_colorScale1 * tmpvar_45) + _Tonemap_heel)) / ((tmpvar_45 * ((_Tonemap_colorScale2 * tmpvar_45) + _Tonemap_shoulder)) + _Tonemap_toeLength)); tmpvar_45 = ((tmpvar_44.w * tmpvar_17.w) * max (dot (tmpvar_42, tmpvar_3), 0.05));
x_44 = tmpvar_46; lAgg_21 = (lAgg_21 + (tmpvar_45 * tmpvar_42));
wAgg_20 = (wAgg_20 + tmpvar_45);
float tmpvar_46;
tmpvar_46 = (dot (tmpvar_7, vec3(0.22, 0.707, 0.071)) * 2.0);
vec3 tmpvar_47; vec3 tmpvar_47;
vec3 tmpvar_48; tmpvar_47 = (lAgg_21 + ((
tmpvar_47 = TANGENT.xyz; ((nn_4.xyz * 2.0) + vec3(0.0, 0.0, -1.0))
tmpvar_48 = (((gl_Normal.yzx * TANGENT.zxy) - (gl_Normal.zxy * TANGENT.yzx)) * TANGENT.w); *
mat3 tmpvar_49; (gl_Color.w * 2.0)
tmpvar_49[0].x = tmpvar_47.x; ) * tmpvar_46));
tmpvar_49[0].y = tmpvar_48.x; lAgg_21 = tmpvar_47;
tmpvar_49[0].z = gl_Normal.x; float tmpvar_48;
tmpvar_49[1].x = tmpvar_47.y; tmpvar_48 = (wAgg_20 + tmpvar_46);
tmpvar_49[1].y = tmpvar_48.y; wAgg_20 = tmpvar_48;
tmpvar_49[1].z = gl_Normal.y; hybridCol_18 = (tmpvar_7 + (tmpvar_11.xyz * tmpvar_44.x));
tmpvar_49[2].x = tmpvar_47.z; hybridCol_18 = (hybridCol_18 + (tmpvar_13.xyz * tmpvar_44.y));
tmpvar_49[2].y = tmpvar_48.z; hybridCol_18 = (hybridCol_18 + (tmpvar_15.xyz * tmpvar_44.z));
tmpvar_49[2].z = gl_Normal.z; hybridCol_18 = (hybridCol_18 + (tmpvar_17.xyz * tmpvar_44.w));
mat3 tmpvar_50; vec3 x_49;
tmpvar_50[0] = _World2Object[0].xyz; x_49 = (hybridCol_18 * 0.25);
tmpvar_50[1] = _World2Object[1].xyz; vec3 tmpvar_50;
tmpvar_50[2] = _World2Object[2].xyz; vec3 cse_51;
cse_51 = (_Tonemap_blackLevel * 0.25);
tmpvar_50 = max ((x_49 - cse_51), vec3(0.0, 0.0, 0.0));
vec3 tmpvar_52;
tmpvar_52 = ((tmpvar_50 * (
(_Tonemap_colorScale1 * tmpvar_50)
+ _Tonemap_heel)) / ((tmpvar_50 *
((_Tonemap_colorScale2 * tmpvar_50) + _Tonemap_shoulder)
) + _Tonemap_toeLength));
x_49 = tmpvar_52;
vec3 x_53;
x_53 = ((_HybridSunCol.xyz * dot (tmpvar_3, _HybridSunDir)) * 0.25);
vec3 tmpvar_54;
tmpvar_54 = max ((x_53 - cse_51), vec3(0.0, 0.0, 0.0));
vec3 tmpvar_55;
tmpvar_55 = ((tmpvar_54 * (
(_Tonemap_colorScale1 * tmpvar_54)
+ _Tonemap_heel)) / ((tmpvar_54 *
((_Tonemap_colorScale2 * tmpvar_54) + _Tonemap_shoulder)
) + _Tonemap_toeLength));
x_53 = tmpvar_55;
vec3 tmpvar_56;
vec3 tmpvar_57;
tmpvar_56 = TANGENT.xyz;
tmpvar_57 = (((gl_Normal.yzx * TANGENT.zxy) - (gl_Normal.zxy * TANGENT.yzx)) * TANGENT.w);
mat3 tmpvar_58;
tmpvar_58[0].x = tmpvar_56.x;
tmpvar_58[0].y = tmpvar_57.x;
tmpvar_58[0].z = gl_Normal.x;
tmpvar_58[1].x = tmpvar_56.y;
tmpvar_58[1].y = tmpvar_57.y;
tmpvar_58[1].z = gl_Normal.y;
tmpvar_58[2].x = tmpvar_56.z;
tmpvar_58[2].y = tmpvar_57.z;
tmpvar_58[2].z = gl_Normal.z;
mat3 tmpvar_59;
tmpvar_59[0] = _World2Object[0].xyz;
tmpvar_59[1] = _World2Object[1].xyz;
tmpvar_59[2] = _World2Object[2].xyz;
gl_Position = (gl_ModelViewProjectionMatrix * gl_Vertex); gl_Position = (gl_ModelViewProjectionMatrix * gl_Vertex);
xlv_TEXCOORD0 = (tmpvar_49 * (tmpvar_50 * (tmpvar_39 / tmpvar_40))); xlv_TEXCOORD0 = (tmpvar_58 * (tmpvar_59 * (tmpvar_47 / tmpvar_48)));
xlv_TEXCOORD1 = (tmpvar_43 * 4.0); xlv_TEXCOORD1 = (tmpvar_52 * 4.0);
xlv_COLOR = (tmpvar_46 * 4.0); xlv_COLOR = (tmpvar_55 * 4.0);
xlv_TEXCOORD2 = ((gl_MultiTexCoord0.xy * _MainTex_ST.xy) + _MainTex_ST.zw); xlv_TEXCOORD2 = ((gl_MultiTexCoord0.xy * _MainTex_ST.xy) + _MainTex_ST.zw);
} }



// inputs: 6, stats: 141 alu 1 tex 0 flow

0 comments on commit 6b07298

Please sign in to comment.