Skip to content

Commit 973303a

Browse files
committed
refactor: avoid copy when drawing tile without any transformation
This reduces CPU usage by ~7%.
1 parent b42fafa commit 973303a

File tree

1 file changed

+38
-34
lines changed

1 file changed

+38
-34
lines changed

src/tiles.cpp

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,13 @@ bool write_tile(tiledata *buf, BITMAP* src, int32_t dest, int32_t x, int32_t y,
555555
return true;
556556
}
557557

558+
static const byte* get_tile_bytes(int32_t tile, int32_t flip)
559+
{
560+
if (flip == 0)
561+
return newtilebuf[tile].data;
562+
unpack_tile(newtilebuf, tile, flip, false);
563+
return unpackbuf;
564+
}
558565

559566
// unpacks from tilebuf to unpackbuf
560567
void unpack_tile(tiledata *buf, int32_t tile, int32_t flip, bool force)
@@ -829,8 +836,8 @@ void puttiletranslucent8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t c
829836

830837
cset &= 15;
831838
cset <<= CSET_SHFT;
832-
unpack_tile(newtilebuf, tile>>2, 0, false);
833-
byte *si = unpackbuf + ((tile&2)<<6) + ((tile&1)<<3);
839+
const byte* bytes = get_tile_bytes(tile>>2, 0);
840+
const byte *si = bytes + ((tile&2)<<6) + ((tile&1)<<3);
834841

835842
if(flip&1) //horizontal
836843
{
@@ -931,8 +938,9 @@ void overtiletranslucent8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t
931938

932939
cset &= 15;
933940
cset <<= CSET_SHFT;
934-
unpack_tile(newtilebuf, tile>>2, 0, false);
935-
byte *si = unpackbuf + ((tile&2)<<6) + ((tile&1)<<3);
941+
942+
const byte* bytes = get_tile_bytes(tile>>2, 0);
943+
const byte *si = bytes + ((tile&2)<<6) + ((tile&1)<<3);
936944

937945
if(flip&1)
938946
{
@@ -1042,8 +1050,8 @@ void puttiletranslucent16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t
10421050

10431051
cset &= 15;
10441052
cset <<= CSET_SHFT;
1045-
unpack_tile(newtilebuf, tile, 0, false);
1046-
byte *si = unpackbuf;
1053+
1054+
const byte* si = get_tile_bytes(tile, flip&5);
10471055
byte *di;
10481056

10491057
if(flip&1)
@@ -1165,8 +1173,8 @@ void overtiletranslucent16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t
11651173

11661174
cset &= 15;
11671175
cset <<= CSET_SHFT;
1168-
unpack_tile(newtilebuf, tile,flip&5, false);
1169-
byte *si = unpackbuf;
1176+
1177+
const byte* si = get_tile_bytes(tile, flip&5);
11701178
byte *di;
11711179

11721180
if((flip&2)==0)
@@ -1284,8 +1292,7 @@ void overtilecloaked16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t fli
12841292
return;
12851293
}
12861294

1287-
unpack_tile(newtilebuf, tile, 0, false);
1288-
byte *si = unpackbuf;
1295+
const byte* si = get_tile_bytes(tile, 0);
12891296
byte *di;
12901297

12911298
if(flip&1)
@@ -1764,7 +1771,7 @@ void overcomboblocktranslucent(BITMAP *dest, int32_t x, int32_t y, int32_t cmbda
17641771
//shnarf
17651772

17661773
// A (slow) function to handle any tile8 draw.
1767-
static void draw_tile8_unified(BITMAP* dest, int cl, int ct, int cr, int cb, byte *si, int32_t x, int32_t y, int32_t cset, int32_t flip)
1774+
static void draw_tile8_unified(BITMAP* dest, int cl, int ct, int cr, int cb, const byte *si, int32_t x, int32_t y, int32_t cset, int32_t flip)
17681775
{
17691776
for (int32_t dy = 0; dy < 8; ++dy)
17701777
{
@@ -1816,7 +1823,7 @@ static void draw_tile8_unified(BITMAP* dest, int cl, int ct, int cr, int cb, byt
18161823
// }
18171824
// }
18181825

1819-
static void draw_tile16_unified(BITMAP* dest, int cl, int ct, int cr, int cb, byte *si, int32_t x, int32_t y, int32_t cset, int32_t flip, bool transparency)
1826+
static void draw_tile16_unified(BITMAP* dest, int cl, int ct, int cr, int cb, const byte *si, int32_t x, int32_t y, int32_t cset, int32_t flip, bool transparency)
18201827
{
18211828
for (int32_t dy = 0; dy < 16; ++dy)
18221829
{
@@ -1852,7 +1859,8 @@ void puttile8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_t
18521859
cset &= 15;
18531860
cset <<= CSET_SHFT;
18541861
dword lcset = (cset<<24)+(cset<<16)+(cset<<8)+cset;
1855-
unpack_tile(newtilebuf, tile>>2, 0, false);
1862+
1863+
const byte* bytes = get_tile_bytes(tile>>2, 0);
18561864

18571865
// TODO: only title.cpp uses this function, so don't bother with this yet. Following code noy verified.
18581866
// 0: fast, no bounds checking
@@ -1872,7 +1880,7 @@ void puttile8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_t
18721880
{
18731881
case 1: // 1 byte at a time
18741882
{
1875-
byte *si = unpackbuf + ((tile&2)<<6) + ((tile&1)<<3);
1883+
const byte *si = bytes + ((tile&2)<<6) + ((tile&1)<<3);
18761884

18771885
for(int32_t dy=0; dy<8; ++dy)
18781886
{
@@ -1888,7 +1896,7 @@ void puttile8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_t
18881896

18891897
case 2: // 4 bytes at a time
18901898
{
1891-
dword *si = ((dword*)unpackbuf) + ((tile&2)<<4) + ((tile&1)<<1);
1899+
const dword *si = ((const dword*)bytes) + ((tile&2)<<4) + ((tile&1)<<1);
18921900

18931901
for(int32_t dy=7; dy>=0; --dy)
18941902
{
@@ -1902,7 +1910,7 @@ void puttile8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_t
19021910

19031911
case 3: // 1 byte at a time
19041912
{
1905-
byte *si = unpackbuf + ((tile&2)<<6) + ((tile&1)<<3);
1913+
const byte *si = bytes + ((tile&2)<<6) + ((tile&1)<<3);
19061914

19071915
for(int32_t dy=7; dy>=0; --dy)
19081916
{
@@ -1918,7 +1926,7 @@ void puttile8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_t
19181926

19191927
default: // 4 bytes at a time
19201928
{
1921-
dword *si = ((dword*)unpackbuf) + ((tile&2)<<4) + ((tile&1)<<1);
1929+
const dword *si = ((const dword*)bytes) + ((tile&2)<<4) + ((tile&1)<<1);
19221930

19231931
for(int32_t dy=0; dy<8; ++dy)
19241932
{
@@ -1951,8 +1959,8 @@ void oldputtile8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int3
19511959

19521960
cset &= 15;
19531961
cset <<= CSET_SHFT;
1954-
unpack_tile(newtilebuf, tile>>2, 0, false);
1955-
byte *si = unpackbuf + ((tile&2)<<6) + ((tile&1)<<3);
1962+
const byte* bytes = get_tile_bytes(tile>>2, 0);
1963+
const byte *si = bytes + ((tile&2)<<6) + ((tile&1)<<3);
19561964

19571965
if(flip&1)
19581966
{
@@ -2062,8 +2070,8 @@ void overtile8(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_
20622070

20632071
cset &= 15;
20642072
cset <<= CSET_SHFT;
2065-
unpack_tile(newtilebuf, tile>>2, 0, false);
2066-
byte *si = unpackbuf + ((tile&2)<<6) + ((tile&1)<<3);
2073+
const byte *bytes = get_tile_bytes(tile>>2, 0);
2074+
const byte *si = bytes + ((tile&2)<<6) + ((tile&1)<<3);
20672075

20682076
// 0: fast, no bounds checking
20692077
// 1: slow, bounds checking
@@ -2181,16 +2189,14 @@ void puttile16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_
21812189

21822190
cset &= 15;
21832191
cset <<= CSET_SHFT;
2184-
2185-
unpack_tile(newtilebuf, tile, flip&5, false);
2192+
const byte *bytes = get_tile_bytes(tile, flip&5);
21862193

21872194
// 0: fast, no bounds checking
21882195
// 1: slow, bounds checking
21892196
int draw_mode = x < cl || y < ct || x >= cr-16 || y >= cb-16 || x%8 || y%8 ? 1 : 0;
21902197
if (draw_mode == 1)
21912198
{
2192-
byte *si = unpackbuf;
2193-
draw_tile16_unified(dest, cl, ct, cr, cb, si, x, y, cset, flip, false);
2199+
draw_tile16_unified(dest, cl, ct, cr, cb, bytes, x, y, cset, flip, false);
21942200
return;
21952201
}
21962202

@@ -2223,7 +2229,7 @@ void puttile16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_
22232229
*/
22242230
qword llcset = (((qword)cset)<<56)+(((qword)cset)<<48)+(((qword)cset)<<40)+(((qword)cset)<<32)+(((qword)cset)<<24)+(cset<<16)+(cset<<8)+cset;
22252231
// qword llcset = (((qword)cset)<<56)|(((qword)cset)<<48)|(((qword)cset)<<40)|(((qword)cset)<<32)|(((qword)cset)<<24)|(cset<<16)|(cset<<8)|cset;
2226-
qword *si = (qword*)unpackbuf;
2232+
const qword *si = (const qword*)bytes;
22272233

22282234
for(int32_t dy=15; dy>=0; --dy)
22292235
{
@@ -2264,7 +2270,7 @@ void puttile16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32_
22642270
*/
22652271
qword llcset = (((qword)cset)<<56)+(((qword)cset)<<48)+(((qword)cset)<<40)+(((qword)cset)<<32)+(((qword)cset)<<24)+(cset<<16)+(cset<<8)+cset;
22662272
// qword llcset = (((qword)cset)<<56)|(((qword)cset)<<48)|(((qword)cset)<<40)|(((qword)cset)<<32)|(((qword)cset)<<24)|(cset<<16)|(cset<<8)|cset;
2267-
qword *si = (qword*)unpackbuf;
2273+
const qword *si = (const qword*)bytes;
22682274

22692275
for(int32_t dy=0; dy<16; ++dy)
22702276
{
@@ -2304,8 +2310,8 @@ void oldputtile16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int
23042310

23052311
cset &= 15;
23062312
cset <<= CSET_SHFT;
2307-
unpack_tile(newtilebuf, tile, flip&5, false);
2308-
byte *si = unpackbuf;
2313+
2314+
const byte* si = get_tile_bytes(tile, flip&5);
23092315
byte *di;
23102316

23112317
if((flip&2)==0)
@@ -2462,16 +2468,14 @@ void overtile16(BITMAP* dest,int32_t tile,int32_t x,int32_t y,int32_t cset,int32
24622468

24632469
cset &= 15;
24642470
cset <<= CSET_SHFT;
2465-
unpack_tile(newtilebuf, tile, flip&5, false);
2466-
byte *si = unpackbuf;
2471+
const byte *si = get_tile_bytes(tile, flip&5);
24672472
byte *di;
24682473

24692474
// 0: fast, no bounds checking
24702475
// 1: slow, bounds checking
24712476
int draw_mode = x < cl || y < ct || x >= cr-16 || y >= cb-16 ? 1 : 0;
24722477
if (draw_mode == 1)
24732478
{
2474-
byte *si = unpackbuf;
24752479
draw_tile16_unified(dest, cl, ct, cr, cb, si, x, y, cset, flip, true);
24762480
return;
24772481
}
@@ -2564,8 +2568,8 @@ void drawtile16_cs2(BITMAP *dest,int32_t tile,int32_t x,int32_t y,int32_t cset[]
25642568
cset[0]=cset[1]=cset[2]=cset[3]=0;
25652569
else for(int q = 0; q < 4; ++q)
25662570
cset[q] <<= CSET_SHFT;
2567-
unpack_tile(newtilebuf, tile, flip&5, false);
2568-
byte *si = unpackbuf;
2571+
2572+
const byte* si = get_tile_bytes(tile, flip&5);
25692573

25702574
bool vflip = (flip&2);
25712575
for(int dx = 0; dx < 16; ++dx)

0 commit comments

Comments
 (0)