Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve Windows path problems #8860

Merged
merged 8 commits into from
Feb 22, 2023
93 changes: 82 additions & 11 deletions tools/mkbuildoptglobals.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,14 @@
import glob
import os
import platform
import traceback
import sys
import textwrap
import time

import locale
import codecs
mhightower83 marked this conversation as resolved.
Show resolved Hide resolved

# Need to work on signature line used for match to avoid conflicts with
# existing embedded documentation methods.
build_opt_signature = "/*@create-file:build.opt@"
Expand All @@ -201,6 +205,7 @@
err_print_flag = False
msg_print_buf = ""
debug_enabled = False
default_encoding = None

# Issues trying to address through buffered printing
# 1. Arduino IDE 2.0 RC5 does not show stderr text in color. Text printed does
Expand Down Expand Up @@ -295,16 +300,16 @@ def copy_create_build_file(source_fqfn, build_target_fqfn):
pass
return True # file changed


def add_include_line(build_opt_fqfn, include_fqfn):
global default_encoding
if not os.path.exists(include_fqfn):
# If file is missing, we need an place holder
with open(include_fqfn, 'w', encoding="utf-8"):
with open(include_fqfn, 'w', encoding=default_encoding):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missed one odd encoding=

Suggested change
with open(include_fqfn, 'w', encoding=default_encoding):
with open(include_fqfn, 'w'):

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What you are suggesting is I didn't need any of this get_encoding() logic - tangent I went on. In the absence of the -X utf8, I just needed to selectively remove all the , encoding="utf-8" options where I needed the system encoding to dominate.

Since merged - do I push an update or need a new PR?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just needed to selectively remove all the , encoding="utf-8" options where I needed the system encoding to dominate.

nothing is written to file, so no need for encoding= to possibly confuse future reader(s)
python won't re-encode stuff, this is just the thing for read / write through text io wrapper

in absence of -Xutf-8, text io selects something default aka depends on version
https://github.com/python/cpython/blob/cbd192b6ff137aec25913cc80b2bf7bf1ef3755b/Modules/_io/textio.c#L1102-L1141 (our bundled version)
https://github.com/python/cpython/blob/ddb65c47b1066fec8ce7a674ebb88b167f417d84/Modules/_io/textio.c#L1126-L1132 (3.11 on any recent installation)

utf-8 output is ok, reading cli / writing stuff for external tools should have system encoding

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nothing is written to file, so no need for encoding= to possibly confuse future reader(s)

Just for clarification when creating build.opt, we are reading utf-8 from dot-h files and writing with system encoding to build.opt otherwise I can get a file not found for a -include ... if the file path contains diacritics.

Copy link
Collaborator

@mcspr mcspr Feb 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also nit:

diacritics

any time we have characters from codepage different from ansii e.g. C:\Users\Максім in cp1251
just my guess that GCC probably does not use win32 _wfopen and widechar, so we have to use exact codepage bytes for filepath representation so system exec (that also does not encode / decode anything in args) for GCC gets proper paths

path and text have different code sections dealing with them, just to denote the difference

(I am gonna go read Python PEP 529, https://vstinner.github.io/python36-utf8-windows.html series and cpython unicodeobject.c)

pass
print("add_include_line: Created " + include_fqfn)
with open(build_opt_fqfn, 'a', encoding="utf-8") as build_opt:
build_opt.write('-include "' + include_fqfn.replace('\\', '\\\\') + '"\n')
print_msg("add_include_line: Created " + include_fqfn)

with open(build_opt_fqfn, 'a', encoding=default_encoding) as build_opt:
build_opt.write('-include "' + include_fqfn.replace('\\', '\\\\') + '"\n')

def extract_create_build_opt_file(globals_h_fqfn, file_name, build_opt_fqfn):
"""
Expand All @@ -313,8 +318,9 @@ def extract_create_build_opt_file(globals_h_fqfn, file_name, build_opt_fqfn):
copy of Sketch.ino.globals.h.
"""
global build_opt_signature
global default_encoding

build_opt = open(build_opt_fqfn, 'w', encoding="utf-8")
build_opt = open(build_opt_fqfn, 'w', encoding=default_encoding)
if not os.path.exists(globals_h_fqfn) or (0 == os.path.getsize(globals_h_fqfn)):
build_opt.close()
return False
Expand Down Expand Up @@ -605,12 +611,64 @@ def parse_args():
# ref nargs='*'', https://stackoverflow.com/a/4480202
# ref no '--n' parameter, https://stackoverflow.com/a/21998252


def get_encoding():
try:
from locale import getencoding
except ImportError:
# https://stackoverflow.com/a/23743499
return locale.getpreferredencoding(False) or locale.getdefaultlocale()[1]

return locale.getencoding()
mhightower83 marked this conversation as resolved.
Show resolved Hide resolved


def show_value(desc, value):
print_dbg(f'{desc:<40} {value}')
return

def locale_dbg():
show_value("get_encoding()", get_encoding())
show_value("locale.getdefaultlocale()", locale.getdefaultlocale())
show_value('sys.getfilesystemencoding()', sys.getfilesystemencoding())
show_value("sys.getdefaultencoding()", sys.getdefaultencoding())
show_value("locale.getpreferredencoding(False)", locale.getpreferredencoding(False))
try:
show_value("locale.getpreferredencoding()", locale.getpreferredencoding())
except:
pass
show_value("sys.stdout.encoding", sys.stdout.encoding)

# use current setting
show_value("locale.setlocale(locale.LC_ALL, None)", locale.setlocale(locale.LC_ALL, None))
try:
show_value("locale.getencoding()", locale.getencoding())
except:
pass
show_value("locale.getlocale()", locale.getlocale())

# use user setting
show_value("locale.setlocale(locale.LC_ALL, '')", locale.setlocale(locale.LC_ALL, ''))
# show_value("locale.getencoding()", locale.getencoding())
show_value("locale.getlocale()", locale.getlocale())
return


def main():
global build_opt_signature
global docs_url
global debug_enabled
global default_encoding
num_include_lines = 1

# Given that GCC will handle lines from an @file as if they were on
# the command line. I assume that the contents of @file need to be encoded
# to match that of the shell running GCC runs. I am not 100% sure this API
# gives me that, but it appears to work.
#
# However, elsewhere when dealing with source code we continue to use 'utf-8',
# ref. https://gcc.gnu.org/onlinedocs/cpp/Character-sets.html
default_encoding = get_encoding()

args = parse_args()
debug_enabled = args.debug
runtime_ide_path = os.path.normpath(args.runtime_ide_path)
Expand All @@ -623,6 +681,13 @@ def main():
build_path_core, build_opt_name = os.path.split(build_opt_fqfn)
globals_h_fqfn = os.path.join(build_path_core, globals_name)

if debug_enabled:
locale_dbg()

default_locale = locale.getdefaultlocale()
print_msg(f'default locale: {default_locale}')
print_msg(f'default_encoding: {default_encoding}')

print_dbg(f"runtime_ide_path: {runtime_ide_path}")
print_dbg(f"runtime_ide_version: {args.runtime_ide_version}")
print_dbg(f"build_path: {build_path}")
Expand Down Expand Up @@ -655,6 +720,10 @@ def main():
print_dbg(f"first_time: {first_time}")
print_dbg(f"use_aggressive_caching_workaround: {use_aggressive_caching_workaround}")

if not os.path.exists(build_path_core):
os.makedirs(build_path_core)
print_msg("Clean build, created dir " + build_path_core)

if first_time or \
not use_aggressive_caching_workaround or \
not os.path.exists(commonhfile_fqfn):
Expand All @@ -667,10 +736,6 @@ def main():
touch(commonhfile_fqfn)
print_err(f"Neutralized future timestamp on build file: {commonhfile_fqfn}")

if not os.path.exists(build_path_core):
os.makedirs(build_path_core)
print_msg("Clean build, created dir " + build_path_core)

if os.path.exists(source_globals_h_fqfn):
print_msg("Using global include from " + source_globals_h_fqfn)

Expand Down Expand Up @@ -750,4 +815,10 @@ def main():
handle_error(0) # commit print buffer

if __name__ == '__main__':
sys.exit(main())
rc = 1
try:
rc = main()
except:
print_err(traceback.format_exc())
handle_error(0)
sys.exit(rc)
4 changes: 3 additions & 1 deletion tools/sizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ def get_segment_sizes(elf, path, mmu):
(".bss", "BSS"),
)

import locale
shell_encoding = locale.getdefaultlocale()[1]
mhightower83 marked this conversation as resolved.
Show resolved Hide resolved
cmd = [os.path.join(path, "xtensa-lx106-elf-size"), "-A", elf]
with subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) as proc:
with subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True, encoding=shell_encoding) as proc:
lines = proc.stdout.readlines()
for line in lines:
words = line.split()
Expand Down