Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unicode issues #252

Merged
merged 15 commits into from
Mar 24, 2011
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion IPython/config/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ def get_config():
return self.config

namespace = dict(load_subconfig=load_subconfig, get_config=get_config)
execfile(self.full_filename, namespace)
fs_encoding = sys.getfilesystemencoding() or 'ascii'
conf_filename = self.full_filename.encode(fs_encoding)
execfile(conf_filename, namespace)

def _convert_to_config(self):
if self.data is None:
Expand Down
10 changes: 8 additions & 2 deletions IPython/core/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,18 +350,22 @@ def find_config_file_paths(self):
# our shipped copies of builtin profiles even if they don't have them
# in their local ipython directory.
prof_dir = os.path.join(get_ipython_package_dir(), 'config', 'profile')
self.config_file_paths = (os.getcwd(), self.ipython_dir, prof_dir)
self.config_file_paths = (os.getcwdu(), self.ipython_dir, prof_dir)

def pre_load_file_config(self):
"""Do actions before the config file is loaded."""
pass

def load_file_config(self):
def load_file_config(self, suppress_errors=True):
"""Load the config file.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Explain suppress errors in docstring


This tries to load the config file from disk. If successful, the
``CONFIG_FILE`` config variable is set to the resolved config file
location. If not successful, an empty config is used.

By default, errors in loading config are handled, and a warning
printed on screen. For testing, the suppress_errors option is set
to False, so errors will make tests fail.
"""
self.log.debug("Attempting to load config file: %s" %
self.config_file_name)
Expand All @@ -377,6 +381,8 @@ def load_file_config(self):
self.config_file_name, exc_info=True)
self.file_config = Config()
except:
if not suppress_errors: # For testing purposes
raise
self.log.warn("Error loading config file: %s" %
self.config_file_name, exc_info=True)
self.file_config = Config()
Expand Down
4 changes: 3 additions & 1 deletion IPython/core/compilerop.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@

def code_name(code, number=0):
""" Compute a (probably) unique name for code for caching.

This now expects code to be unicode.
"""
hash_digest = hashlib.md5(code).hexdigest()
hash_digest = hashlib.md5(code.encode("utf-8")).hexdigest()
# Include the number and 12 characters of the hash in the name. It's
# pretty much impossible that in a single session we'll have collisions
# even with truncated hashes, and the full one makes tracebacks too long
Expand Down
28 changes: 6 additions & 22 deletions IPython/core/inputsplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
# Imports
#-----------------------------------------------------------------------------
# stdlib
import ast
import codeop
import re
import sys
Expand Down Expand Up @@ -185,9 +186,6 @@ def split_blocks(python):
commands : list of str
Separate commands that can be exec'ed independently.
"""

import compiler

# compiler.parse treats trailing spaces after a newline as a
# SyntaxError. This is different than codeop.CommandCompiler, which
# will compile the trailng spaces just fine. We simply strip any
Expand All @@ -197,22 +195,15 @@ def split_blocks(python):
python_ori = python # save original in case we bail on error
python = python.strip()

# The compiler module does not like unicode. We need to convert
# it encode it:
if isinstance(python, unicode):
# Use the utf-8-sig BOM so the compiler detects this a UTF-8
# encode string.
python = '\xef\xbb\xbf' + python.encode('utf-8')

# The compiler module will parse the code into an abstract syntax tree.
# This has a bug with str("a\nb"), but not str("""a\nb""")!!!
try:
ast = compiler.parse(python)
code_ast = ast.parse(python)
except:
return [python_ori]

# Uncomment to help debug the ast tree
# for n in ast.node:
# for n in code_ast.body:
# print n.lineno,'->',n

# Each separate command is available by iterating over ast.node. The
Expand All @@ -223,14 +214,7 @@ def split_blocks(python):
# other situations that cause Discard nodes that shouldn't be discarded.
# We might eventually discover other cases where lineno is None and have
# to put in a more sophisticated test.
linenos = [x.lineno-1 for x in ast.node if x.lineno is not None]

# When we have a bare string as the first statement, it does not end up as
# a Discard Node in the AST as we might expect. Instead, it gets interpreted
# as the docstring of the module. Check for this case and prepend 0 (the
# first line number) to the list of linenos to account for it.
if ast.doc is not None:
linenos.insert(0, 0)
linenos = [x.lineno-1 for x in code_ast.body if x.lineno is not None]

# When we finally get the slices, we will need to slice all the way to
# the end even though we don't have a line number for it. Fortunately,
Expand Down Expand Up @@ -603,7 +587,7 @@ def _store(self, lines, buffer=None, store='source'):

If input lines are not newline-terminated, a newline is automatically
appended."""

if buffer is None:
buffer = self._buffer

Expand All @@ -614,7 +598,7 @@ def _store(self, lines, buffer=None, store='source'):
setattr(self, store, self._set_source(buffer))

def _set_source(self, buffer):
return ''.join(buffer).encode(self.encoding)
return u''.join(buffer)


#-----------------------------------------------------------------------------
Expand Down
7 changes: 4 additions & 3 deletions IPython/core/interactiveshell.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,12 +1550,14 @@ def init_readline(self):
# otherwise we end up with a monster history after a while:
readline.set_history_length(self.history_length)

stdin_encoding = sys.stdin.encoding or "utf-8"

# Load the last 1000 lines from history
for _, _, cell in self.history_manager.get_tail(1000,
include_latest=True):
if cell.strip(): # Ignore blank lines
for line in cell.splitlines():
readline.add_history(line)
readline.add_history(line.encode(stdin_encoding))

# Configure auto-indent for all platforms
self.set_autoindent(self.autoindent)
Expand Down Expand Up @@ -2105,7 +2107,6 @@ def run_cell(self, cell, store_history=True):
if len(cell.splitlines()) <= 1:
cell = self.prefilter_manager.prefilter_line(blocks[0])
blocks = self.input_splitter.split_blocks(cell)


# Store the 'ipython' version of the cell as well, since that's what
# needs to go into the translated history and get executed (the
Expand Down Expand Up @@ -2246,7 +2247,7 @@ def run_source(self, source, filename=None,
else:
usource = source

if 0: # dbg
if False: # dbg
print 'Source:', repr(source) # dbg
print 'USource:', repr(usource) # dbg
print 'type:', type(source) # dbg
Expand Down
3 changes: 2 additions & 1 deletion IPython/core/magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2063,7 +2063,8 @@ def magic_save(self,parameter_s = ''):
return
cmds = self.extract_input_lines(ranges, 'r' in opts)
with open(fname,'w') as f:
f.write(cmds)
f.write("# coding: utf-8\n")
f.write(cmds.encode("utf-8"))
print 'The following commands were written to file `%s`:' % fname
print cmds

Expand Down
68 changes: 68 additions & 0 deletions IPython/core/tests/test_application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# coding: utf-8
"""Tests for IPython.core.application"""

import os
import tempfile

from IPython.core.application import Application

def test_unicode_cwd():
"""Check that IPython starts with non-ascii characters in the path."""
wd = tempfile.mkdtemp(suffix=u"€")

old_wd = os.getcwdu()
os.chdir(wd)
#raise Exception(repr(os.getcwd()))
try:
app = Application()
# The lines below are copied from Application.initialize()
app.create_default_config()
app.log_default_config()
app.set_default_config_log_level()

# Find resources needed for filesystem access, using information from
# the above two
app.find_ipython_dir()
app.find_resources()
app.find_config_file_name()
app.find_config_file_paths()

# File-based config
app.pre_load_file_config()
app.load_file_config(suppress_errors=False)
finally:
os.chdir(old_wd)

def test_unicode_ipdir():
"""Check that IPython starts with non-ascii characters in the IP dir."""
ipdir = tempfile.mkdtemp(suffix=u"€")

# Create the config file, so it tries to load it.
with open(os.path.join(ipdir, 'ipython_config.py'), "w") as f:
pass

old_ipdir1 = os.environ.pop("IPYTHONDIR", None)
old_ipdir2 = os.environ.pop("IPYTHON_DIR", None)
os.environ["IPYTHONDIR"] = ipdir.encode("utf-8")
try:
app = Application()
# The lines below are copied from Application.initialize()
app.create_default_config()
app.log_default_config()
app.set_default_config_log_level()

# Find resources needed for filesystem access, using information from
# the above two
app.find_ipython_dir()
app.find_resources()
app.find_config_file_name()
app.find_config_file_paths()

# File-based config
app.pre_load_file_config()
app.load_file_config(suppress_errors=False)
finally:
if old_ipdir1:
os.environ["IPYTHONDIR"] = old_ipdir1
if old_ipdir2:
os.environ["IPYTHONDIR"] = old_ipdir2
12 changes: 12 additions & 0 deletions IPython/core/tests/test_compilerop.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding: utf-8
"""Tests for the compilerop module.
"""
#-----------------------------------------------------------------------------
Expand All @@ -15,6 +16,7 @@

# Stdlib imports
import linecache
import sys

# Third-party imports
import nose.tools as nt
Expand Down Expand Up @@ -46,6 +48,16 @@ def test_compiler():
cp('x=1', 'single')
nt.assert_true(len(linecache.cache) > ncache)

def setUp():
# Check we're in a proper Python 2 environment (some imports, such
# as GTK, can change the default encoding, which can hide bugs.)
nt.assert_equal(sys.getdefaultencoding(), "ascii")

def test_compiler_unicode():
cp = compilerop.CachingCompiler()
ncache = len(linecache.cache)
cp(u"t = 'žćčšđ'", "single")
nt.assert_true(len(linecache.cache) > ncache)

def test_compiler_check_cache():
"""Test the compiler properly manages the cache.
Expand Down
11 changes: 8 additions & 3 deletions IPython/core/tests/test_history.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding: utf-8
"""Tests for the IPython tab-completion machinery.
"""
#-----------------------------------------------------------------------------
Expand All @@ -16,8 +17,10 @@
from IPython.utils.tempdir import TemporaryDirectory
from IPython.core.history import HistoryManager, extract_hist_ranges

def test_history():
def setUp():
nt.assert_equal(sys.getdefaultencoding(), "ascii")

def test_history():
ip = get_ipython()
with TemporaryDirectory() as tmpdir:
#tmpdir = '/software/temp'
Expand All @@ -32,7 +35,7 @@ def test_history():
ip.history_manager.init_db() # Has to be called after changing file
ip.history_manager.reset()
print 'test',histfile
hist = ['a=1', 'def f():\n test = 1\n return test', 'b=2']
hist = ['a=1', 'def f():\n test = 1\n return test', u"b='€Æ¾÷ß'"]
for i, h in enumerate(hist, start=1):
ip.history_manager.store_inputs(i, h)

Expand Down Expand Up @@ -82,7 +85,8 @@ def test_history():
testfilename = os.path.realpath(os.path.join(tmpdir, "test.py"))
ip.magic_save(testfilename + " ~1/1-3")
testfile = open(testfilename, "r")
nt.assert_equal(testfile.read(), "\n".join(hist))
nt.assert_equal(testfile.read().decode("utf-8"),
"# coding: utf-8\n" + "\n".join(hist))

# Duplicate line numbers - check that it doesn't crash, and
# gets a new session
Expand All @@ -92,6 +96,7 @@ def test_history():
# Restore history manager
ip.history_manager = hist_manager_ori


def test_extract_hist_ranges():
instr = "1 2/3 ~4/5-6 ~4/7-~4/9 ~9/2-~7/5"
expected = [(0, 1, 2), # 0 == current session
Expand Down
2 changes: 1 addition & 1 deletion IPython/core/tests/test_inputsplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def test_split_syntax_errors(self):
def test_unicode(self):
self.isp.push(u"Pérez")
self.isp.push(u'\xc3\xa9')
self.isp.push("u'\xc3\xa9'")
self.isp.push(u"u'\xc3\xa9'")

class InteractiveLoopTestCase(unittest.TestCase):
"""Tests for an interactive loop like a python shell.
Expand Down
4 changes: 2 additions & 2 deletions IPython/core/tests/test_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,9 @@ def test_parse_options():

def test_dirops():
"""Test various directory handling operations."""
curpath = lambda :os.path.splitdrive(os.getcwd())[1].replace('\\','/')
curpath = lambda :os.path.splitdrive(os.getcwdu())[1].replace('\\','/')

startdir = os.getcwd()
startdir = os.getcwdu()
ipdir = _ip.ipython_dir
try:
_ip.magic('cd "%s"' % ipdir)
Expand Down
9 changes: 4 additions & 5 deletions IPython/testing/iptest.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ def test_for(mod):
have['twisted'] = test_for('twisted')
have['foolscap'] = test_for('foolscap')
have['pexpect'] = test_for('pexpect')
have['gtk'] = test_for('gtk')
have['gobject'] = test_for('gobject')

#-----------------------------------------------------------------------------
# Functions and classes
Expand Down Expand Up @@ -170,9 +168,10 @@ def make_exclude():

if not have['wx']:
exclusions.append(ipjoin('lib', 'inputhookwx'))

if not have['gtk'] or not have['gobject']:
exclusions.append(ipjoin('lib', 'inputhookgtk'))

# We do this unconditionally, so that the test suite doesn't import
# gtk, changing the default encoding and masking some unicode bugs.
exclusions.append(ipjoin('lib', 'inputhookgtk'))

# These have to be skipped on win32 because the use echo, rm, cd, etc.
# See ticket https://bugs.launchpad.net/bugs/366982
Expand Down