-
Notifications
You must be signed in to change notification settings - Fork 0
/
xml2msg.py
435 lines (393 loc) · 23.9 KB
/
xml2msg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
# Honoruty, a message converter for SMG1's BMG message files, (c) 2020 EPICHICKENSOUP
# This software is under the GNU GPL 3.0 license. Find out more at https://www.gnu.org/licenses/.
import struct
import csv
import os
import sys
from pathlib import Path
def error(string, errtype=0):
"""Predefined/preformatted error function."""
if errtype == 1:
pre = 'XML error!'
else:
pre = 'Error!'
print('\n'+pre, string, '\n')
input('Press enter to exit.')
quit(-1)
debug = False # debug mode. Unlike msg2xml, doesn't control verbosity
try:
xmlfilepath = sys.argv[1]
except:
error('Please input a file.\nExample: python xml2msg.py message.xml')
if os.name == 'nt': # if windows
indexlasttick = xmlfilepath.rfind('\\')
else: # if normal
indexlasttick = xmlfilepath.rfind('/')
folder = xmlfilepath[0 : indexlasttick + 1]
filename = xmlfilepath[indexlasttick + 1 :]
filename = filename[0:filename.rfind('.')]
endian = '>' # Gonna do this just like msg2xml.py does it... with a character we set to the whichever endian
escape = b'\x00\x1A'
if debug:
print('Folder: ' + folder)
with open(xmlfilepath, 'r') as f: # Make sure the path to the xml file is valid
magic = f.read(10)
if magic != '<MESGbmg1>':
if magic == '<GSEM1gmb>': # It is in 3DAS mode!
print('Converting to little endian file for 3DAS!')
endian = '<'
escape = b'\x1A\x00'
else:
error('Please input a valid MESGbmg1 XML file.')
ids = bytearray()
names = bytearray()
names.append(0)
num = 1
def getinvdict(f):
"""Return the inverse of a dictionary created from a csv file."""
source = csv.reader(open(f))
redict = dict(source)
invdict = {v: k for k, v in redict.items()}
return invdict
def getcsvmore(f):
"""Get the "more" value (usually the last one) from one of the csvs."""
source = csv.reader(open(f))
redict = dict(source)
return redict.get('more')
# use this to find the csv folder
scriptpath = Path(os.path.dirname(os.path.realpath(sys.argv[0])))
if debug:
print('Script at path ' + str(scriptpath))
csv_folder = scriptpath / 'csv'
# Import all the dictionaries from attatched CSVs
print('Fetching CSV files...')
if endian == '>': # if smg
demoji = getinvdict(csv_folder / 'emoji_hex.csv')
emoji_unknown_text = getcsvmore(csv_folder / 'emoji_hex.csv')
else: # if 3das
demoji = getinvdict(csv_folder / 'emoji_3das.csv')
emoji_unknown_text = getcsvmore(csv_folder / 'emoji_3das.csv')
print(f'emoji_unknown_text is "{emoji_unknown_text}"')
dcolor = getinvdict(csv_folder / 'color_hex.csv')
dpause = getinvdict(csv_folder / 'pause_hex.csv')
doddpause = getinvdict(csv_folder / 'oddpause_hex.csv')
dnumber = getinvdict(csv_folder / 'number_hex.csv')
dnames = getinvdict(csv_folder / 'names_hex.csv') # Names of tags
dsizes = getinvdict(csv_folder / 'sizes_hex.csv')
dplumber = getinvdict(csv_folder / 'plumber_hex.csv')
print('All CSV files loaded.\n')
def packtuple(t):
"""Pack a string-formatted tuple into a bytearray"""
r = bytearray()
tl = str(t).replace('(', '').replace(')','').split(',')
for i in tl:
r.append(int(i))
return r
def formmessageinf(tup): # input a string of a tuple and recieve an 8 byte long bytearray that is the message inf
actualtuple = str(tup).replace('(', '').replace(')','').split(',')
print(f' actualtuple {actualtuple}')
camerashort = bytearray(struct.pack(f'{endian}H', int(actualtuple[0])))
actualtuple = actualtuple[1:]
ret = camerashort
for i in actualtuple:
ret.append(int(i))
return ret
inf1 = bytearray() # inf1 section contains a bunch of bytes in order `pointer` (4 bytes), `info` (for smg, 8 bytes)
dat1 = bytearray(b'\x00\x00') # Bytearray to put all of the message strings into. Written to file at the end of parsing.
# Preloaded with the 2 null bytes for blank messages to point to.
# Fun fact, I'm reading the file wrong... the very first message is named "" and has no content. It does have properties though.
# Bonus. I could replace these bytearrays with a bunch of "message" structs containing the name, inf, and data. Then, just write them all to the file at the end.
# In this way, I could add automatic message sorting.
v = True # Verbosity setting
total = 0
if debug:
totaltags = 0
message_name = ''
text = bytearray()
begintail = ''
# A list of messages to debug.
ldebug = ['AstroGalaxy_ButlerMap006','AstroGalaxy_Tico027','AstroGalaxy_Rosetta057','CONT_10_2','ForestHomeZone_HoneyBee005','HeavensDoorInsideZone_Tico004','Layout_StoryDemoKoopaTalk006','PictureBookChapter4_Page4_001','ScenarioName_CosmosGardenGalaxy4','Select_ReturnToAstro_No','SurfingLv2Galaxy_Penguin015']
print('Beginning XML parsing...')
with open(xmlfilepath, 'r') as xmlf: # open xml file
inmessage = False # We are not currently in a message. Keep track
message_text = bytearray() # build message into this variable, it's a bytearray because of binary escapes
while True:
char = xmlf.read(1) # repeat until first '<' idk
if char == '<': # look at you finding a tag
if v: print('"<" at ' + str(xmlf.tell()))
xml_tag = ''
while True: # Get the tag real quick
char = xmlf.read(1)
if (char == ' ' or char == '\n' or char == '>' or char == '/') and not xml_tag == '': # allow spaces and all sorts of silly stuff before the tag
break
xml_tag += char
# Now we know the inside of the XML tag!
if xml_tag.startswith('/'): # End tag. If it starts with a '/' it's an end tag, so basically ignore it
if xml_tag == '/message': # Message ended!
inmessage = False # Message ended!
message_text.extend(b'\x00\x00') # Add the null character at the end of the message
if v: print('Message ended as "' + str(message_text) + '"!')
dat1 += message_text # Add the message to the dat
message_text = bytearray() # clear the message, we are done reading it
if xml_tag == '/MESGbmg1' or xml_tag == '/GSEM1gmb': # We must have reached the end of the file! Break the big while loop
break
elif xml_tag == 'message': # Start of a message.
total += 1 # add to the message count to track errors for the user
inmessage = True
dat1start = len(dat1) # Record where message is going to start when added (as long as message isn't blank)
if v: print('Message starting at ' + hex(dat1start))
namefound = False # keep track of whether we've found the name while we're looking for it
message_name = ''
while True: # Loop through all properties of the current message
prop = '' # This little function gets the whole property
inquote = False
while True:
char = xmlf.read(1) # advance through text
if char == '"': # We have entered or exited a quote
inquote = not inquote
if (char == ' ' or char == '/' or char == '>' or char == '\n') and not inquote: # something ended
break # this works because the while loop will keep advancing through every ' ', '/' or '>'
prop += char
if prop.startswith('name="'): # Getting the name of the message. A messageid.tbl writer would in theory hook in here.
message_name = prop[6:-1]
if v: print(f'Name property found, very good. It\'s "{message_name}"')
# You could check the name against the original file somewhere in here if you were to, say, check for missing messages.
namefound = True
elif prop.startswith('info="'): # info property found, use our inf packing function
# put info in a variable
inf = formmessageinf(prop[6:-1])
if v: print('Info property found, very good. It\'s "' + prop[6:-1] + '" aka "' + str(inf) + '"')
if char == '/' or char == '>':
if not namefound: # got to end without finding name??
error('name property of message ' + str(total) + ' not found!', 1)
if not (len(inf) > 1): # Also check if info existed, since we will be writing that
error(f'info property of message {message_name} not found!', 1)
if char == '/':
if v: print('Oh no, message is empty!')
inmessage = False
# no need to advance to the '>' char because we step until we find a '<'
# Add 0 pointer for blank message
inf1.extend(b'\x00\x00\x00\x00') # Pointer to beginning of dat1, 0.
else: # Message was not empty, add offset to inf
inf1.extend(struct.pack('>I', dat1start))
inf1 += inf
inf = bytearray() # Reset inf so we can accurately check if it exists next time
# print("Inf1 so far is " + str(inf1))
break # End of tag, found all properties.
elif xml_tag == 'note': # keeping support for this bc it's easier to type
if endian == '>': # if smg, big endian
message_text.extend(b'\x26\x6A') # apparently this is ascii '&j', who knew?
else: # if 3das
message_text.extend(b'\x6A\x26')
while True: # do while to get to the end of the note tag
char = xmlf.read(1) # keep advancing 1 char at a time
if char == '>':
break
elif xml_tag in dnames: # If the tag shows up in our names dictionary
tagid = int(dnames.get(xml_tag))
if v: print('Tag "' + xml_tag + '" has id ' + str(tagid))
props = '' # we can parse this pretty strictly, so put it all in one
while True: # get properties + stuff
char = xmlf.read(1)
if char == '"':
inquote = not inquote
elif (char == '/' or char == ' ') and not inquote:
char = xmlf.read(1)# done reading tag properties, advance to end of tag
if char == '>':
break
props += char
if props == '' and not tagid == 9: # racetime tag (id 9) requires no properties
error(f'Message "{message_name}": Element "{xml_tag}" has no properties', 1)
else:
if v: print(' Properties: "' + props + '"')
if tagid == 1: # Pauses
if props[0:8] == 'length="':
lengthstr = props[8:-1] # Write to a string to easier check the dictionaries
if lengthstr in doddpause:
lengthid = int(doddpause.get(lengthstr))
message_text.extend(escape)
message_text.extend(b'\x06\x01') # Extend with the escape character, lenth of 6, and id 1
message_text.extend(struct.pack('>H', lengthid)) # Pack number from dictionary as a big endian unsigned short and append
elif lengthstr in dpause:
lengthid = int(dpause.get(lengthstr))
message_text.extend(escape)
message_text.extend(b'\x08\x01') # Extend with escape character, length of 8, and id of 1
message_text.extend(struct.pack('>I', lengthid)) # Pack number from dictionary as a big endian unsigned int and append
else:
error(f'Message "{message_name}": {xml_tag} tag has invalid length property "{lengthstr}"', 1)
# Descriptive errors to make it as user friendly as possible :D
else:
error(f'Message "{message_name}": {xml_tag} tag missing "length" property', 1)
elif tagid == 2: # Sound tag
if props[0:6] == 'name="':
namestr = bytearray(props[6:-1], "utf-16-be") # Encode the name string
print(' !! Sound found, sound name is "' + props[6:-1] + '" !!')
message_text.extend(escape) # Escape character
message_text.extend(struct.pack('>B', (6 + len(namestr)))) # Pack string length (+6 for the escape sequence itself and first null bytes) to a single byte and add it.
message_text.extend(b'\x02\x00\x00') # Add escape identifier and 2 null bytes for no good reason.
message_text.extend(namestr) # Finally, add the actual name string.
else:
error(f'Message "{message_name}": {xml_tag} tag missing "name" property', 1)
elif tagid == 3: # Emoji tag
if props[0:6] == 'name="':
if props[6:-1] in demoji:
nameid = int(demoji.get(props[6:-1])) # Get the ID of the emoji using the properties string and our dictionary
elif props[6:-1].startswith(emoji_unknown_text): # need to check if the first letters are the csv entry for 'unknown' / 'more'
try:
nameid = int(props[6 + len(emoji_unknown_text):-1])
except:
error(f'Message "{message_name}": Special "{emoji_unknown_text}" {xml_tag} tag: "{props[len(emoji_unknown_text):-1]}" is not a valid integer.')
else:
error(f'Message "{message_name}": {xml_tag} tag has invalid name property "{props [6:-1]}"')
else:
error(f'Message "{message_name}": {xml_tag} tag missing "name" property', 1)
message_text.extend(escape)
message_text.extend(b'\x06\x03') # Add escape character, length, and identifier
message_text.extend(struct.pack('>H', nameid)) # Pack emoji ID to a (big endian) short and add it
elif tagid == 4: # Text size
if props[0:6] == 'name="':
if props[6:-1] in dsizes:
nameid = int(dsizes.get(props[6:-1]))
else:
error(f'Message "{message_name}": {xml_tag} tag has invalid name property "{props[6:-1]}"', 1)
message_text.extend(escape)
message_text.extend(b'\x06\x04')
message_text.extend(struct.pack('>H', nameid))
else:
error(f'Message "{message_name}": {xml_tag} tag missing "name" property')
elif tagid == 5: # Player's name
if props[0:7] == 'style="':
if props[7:-1] in dplumber:
nameid = int(dplumber.get(props[7:-1]))
else:
error(f'Message "{message_name}": {xml_tag} tag has invalid style property "{props[7:-1]}"', 1)
else:
error(f'Message "{message_name}": {xml_tag} tag missing "style" property')
message_text.extend(escape)
message_text.extend(b'\x08\x05\x00') # Add the escape character, length, and escape ID. Also a null byte to allign the number.
message_text.extend(struct.pack('>H', nameid)) # Pack the name number into 2 bytes
message_text.extend(b'\x00') # Add some null to allign it
elif tagid == 6 or tagid == 7: # Number or system text. Pretty weird.
if props[0:4] == 'id="':
nameid = packtuple(props[4:-1]) # reusing the nameid variable, guess it stands for "name/id" now
message_text.extend(escape)
if tagid == 6: # Add the escape character, the length of the escape, and either 6 or 7 for the escape ID (LEN 4)
message_text.extend(b'\x0E\x06')
else:
message_text.extend(b'\x0E\x07')
message_text.extend(b'\x00') # A null byte to allign the first number of the ID (LEN 5)
message_text.extend(struct.pack('>B', nameid[0])) # Pack and add first number (LEN 6)
message_text.extend(b'\x00\x00\x00\x00\x00\x00\x00') # 7 null bytes to allign the second number (LEN 13)
message_text.extend(struct.pack('>B', nameid[1])) # Pack and add second number (LEN 14 = 0xE)
else:
error(f'Message "{message_name}": {xml_tag} tag has invalid id property "{props [4:-1]}"', 1)
elif tagid == 9: # No tag 8 for some reason, maybe it's a secret!
message_text.extend(escape)
message_text.extend(b'\x06\x09\x00\x05') # tagid 9 is the race time. No part of it changes (in the original game...)
elif tagid == 255: # lol why is color -1 or FF depending on if this number is technically signed or not?
if props[0:6] == 'name="': # Check if they wrote the name property validly
if props[6:-1] in dcolor:
nameid = int(dcolor.get(props[6:-1]))
else:
error(f'Message "{message_name}": {xml_tag} tag has invalid name property "{props[6:-1]}"', 1)
else:
error(f'Message "{message_name}": {xml_tag} tag missing "name" property', 1)
message_text.extend(escape)
message_text.extend(b'\x08\xFF\x00') # Add escape character, escape length, escape ID, and a null byte because just like plumber names, the actual id is alligned awkwardly
message_text.extend(struct.pack('>H', nameid)) # Add the actual data as derived from the dictionary
message_text.extend(b'\x00') # Add the null to bring us to length of 8
else:
if xml_tag != 'MESGbmg1' and xml_tag != 'GSEM1gmb': # don't call the file name ones invalid
error(f'Message "{message_name}": tag "{xml_tag}" not recognized', 1)
break
# if total > 5: # debug
# break
elif char == '&': # if char not '<'
# Handle xml escape characters!?
xmlescname = '' # put stuff in here as we go
while True:
char = xmlf.read(1) # increment through text
if char == ';': # "Properly" add the corresponding character to the message
oldxmlescname = xmlescname
xmlescname.replace('amp','&').replace('lt','<').replace('gt','>').replace('quot','"').replace('apos',"'")
if endian == '>': # if big endian
message_text.extend(bytearray(xmlescname, "utf-16-be"))
else: # if 3das
message_text.extend(bytearray(char, "utf-16-le"))
break
else:
xmlescname += char
if len(xmlescname) > 4:
error(f'Message "{message_name}": Invalid XML escape "&{xmlescname}"', 1) # Don't let a missed ';' destroy everything
elif inmessage: # if char not '<'
if endian == '>': # if big endian
message_text.extend(bytearray(char, "utf-16-be")) # add the char to the message (using proper encoding!)
else: # if 3das
message_text.extend(bytearray(char, "utf-16-le"))
if total == 2464:
print('Processed 2464 messages.')
else:
print(f'\nWARNING! There are supposed to be 2464 messages, but instead {total} were found. This will probably cause glitches in the game.\n')
while len(inf1) % 32 != 0: # pad to a multiple of 32
inf1.append(0)
# with open('inf1beta.bmg', 'wb') as ifile:
# ifile.write(b'INF1')
# ifile.write(struct.pack('>I', len(inf1) + 16))
# ifile.write(struct.pack('>H', total))
# ifile.write(b'\x00\x0c\x00\x00\x00\x00')
# ifile.write(inf1)
while (len(dat1) - 8) % 32 != 0: # pad to a multiple of 32
dat1.append(0)
# with open('dat1betatest.bmg', 'wb') as dfile:
# dfile.write(b'DAT1')
# dfile.write(struct.pack('>I', len(dat1) + 8))
# dfile.write(dat1)
print("Copying flow section bytes from original BMG...")
flbytes = bytearray()
# Get the fl sections at the end using the existing bmg,
# because I have no idea what they actually mean.
try: # Check two places for the original BMG.
fget = open(folder + filename + '.bmg', 'rb')
except:
try:
fget = open(folder + 'message.bmg', 'rb')
except:
error('Please put the XML file in the same folder as the original BMG. (Unable to copy FLW and FLI bytes!)')
fget.seek(8)
flwoffset = struct.unpack('>I', fget.read(4))[0] # Nab the offset of the fl sections.
fget.seek(flwoffset) # Then go to that offset
# save flbytes for later because we're gonna overwrite the file
flbytes = fget.read()
fget.close()
with open(folder + filename + '.bmg', 'wb') as mfile:
print('Writing BMG file...')
if endian == '>': # if smg
mfile.write(b'MESGbmg1')
else: # if 3das
mfile.write(b'GSEM1gmb')
mfile.write(struct.pack(f'{endian}I', len(inf1) + len(dat1) + 16 + 8 + 32)) # Length of file at 0x08 in header
#mfile.write(b'\x00\x00\x00\x04')
mfile.write(struct.pack(f'{endian}I', 4)) # 4 is the number of sections in the file (inf1, dat1, flw1, and fli1)
mfile.write(b'\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') # 2 is the encoding (utf-16), and the zeros are unknown
if endian == '>': # if smg
mfile.write(b'INF1') # inf1 section magic
else: # if 3das
mfile.write(b'1FNI')
mfile.write(struct.pack(f'{endian}I', len(inf1) + 16)) # +16 to compensate for inf1 section header
mfile.write(struct.pack(f'{endian}H', total)) # number of messages
mfile.write(struct.pack(f'{endian}H', 12)) # 0x0C is the length of each inf1 entry
mfile.write(b'\x00\x00\x00\x00') # Some padding
mfile.write(inf1) # write the inf1 we've been compiling
inf1 = bytearray() # empty this variable
if endian == '>': # if smg
mfile.write(b'DAT1') # dat1 section magic
else: # if 3das
mfile.write(b'1TAD') # lol tad
mfile.write(struct.pack(f'{endian}I', len(dat1) + 8)) # length of dat1 +8 for the dat1 section header
mfile.write(dat1) # write the actual dat1
dat1 = bytearray() # empty this variable
mfile.write(flbytes) # put the fl bytes we copied
flbytes = bytearray() # empty this variable for no good reason since ending the script will free the memory anyway
print('Finished writing BMG file. Have a nice day!')
# print('\nWARNING! At the moment, there is a bug where random parts of messages get removed. This issue is known and is being worked on.')
# # #