ipython · takluyver · Mar 24, 2011 · Jan 22, 2011 · Jan 23, 2011 · Jan 23, 2011
diff --git a/IPython/config/loader.py b/IPython/config/loader.py
@@ -285,7 +285,9 @@ def get_config():
             return self.config
 
         namespace = dict(load_subconfig=load_subconfig, get_config=get_config)
-        execfile(self.full_filename, namespace)
+        fs_encoding = sys.getfilesystemencoding() or 'ascii'
+        conf_filename = self.full_filename.encode(fs_encoding)
+        execfile(conf_filename, namespace)
 
     def _convert_to_config(self):
         if self.data is None:

diff --git a/IPython/core/application.py b/IPython/core/application.py
@@ -350,18 +350,22 @@ def find_config_file_paths(self):
         # our shipped copies of builtin profiles even if they don't have them
         # in their local ipython directory.
         prof_dir = os.path.join(get_ipython_package_dir(), 'config', 'profile')
-        self.config_file_paths = (os.getcwd(), self.ipython_dir, prof_dir)
+        self.config_file_paths = (os.getcwdu(), self.ipython_dir, prof_dir)
 
     def pre_load_file_config(self):
         """Do actions before the config file is loaded."""
         pass
 
-    def load_file_config(self):
+    def load_file_config(self, suppress_errors=True):
         """Load the config file.
 
         This tries to load the config file from disk.  If successful, the
         ``CONFIG_FILE`` config variable is set to the resolved config file
         location.  If not successful, an empty config is used.
+
+        By default, errors in loading config are handled, and a warning
+        printed on screen. For testing, the suppress_errors option is set
+        to False, so errors will make tests fail.
         """
         self.log.debug("Attempting to load config file: %s" %
                        self.config_file_name)
@@ -377,6 +381,8 @@ def load_file_config(self):
                                self.config_file_name, exc_info=True)
             self.file_config = Config()
         except:
+            if not suppress_errors:     # For testing purposes
+                raise
             self.log.warn("Error loading config file: %s" %
                           self.config_file_name, exc_info=True)
             self.file_config = Config()

diff --git a/IPython/core/compilerop.py b/IPython/core/compilerop.py
@@ -38,8 +38,10 @@
 
 def code_name(code, number=0):
     """ Compute a (probably) unique name for code for caching.
+
+    This now expects code to be unicode.
     """
-    hash_digest = hashlib.md5(code).hexdigest()
+    hash_digest = hashlib.md5(code.encode("utf-8")).hexdigest()
     # Include the number and 12 characters of the hash in the name.  It's
     # pretty much impossible that in a single session we'll have collisions
     # even with truncated hashes, and the full one makes tracebacks too long

diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py
@@ -66,6 +66,7 @@
 # Imports
 #-----------------------------------------------------------------------------
 # stdlib
+import ast
 import codeop
 import re
 import sys
@@ -185,9 +186,6 @@ def split_blocks(python):
     commands : list of str
         Separate commands that can be exec'ed independently.
     """
-
-    import compiler
-
     # compiler.parse treats trailing spaces after a newline as a
     # SyntaxError.  This is different than codeop.CommandCompiler, which
     # will compile the trailng spaces just fine.  We simply strip any
@@ -197,22 +195,15 @@ def split_blocks(python):
     python_ori = python # save original in case we bail on error
     python = python.strip()
 
-    # The compiler module does not like unicode. We need to convert
-    # it encode it:
-    if isinstance(python, unicode):
-        # Use the utf-8-sig BOM so the compiler detects this a UTF-8
-        # encode string.
-        python = '\xef\xbb\xbf' + python.encode('utf-8')
-
     # The compiler module will parse the code into an abstract syntax tree.
     # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
     try:
-        ast = compiler.parse(python)
+        code_ast = ast.parse(python)
     except:
         return [python_ori]
 
     # Uncomment to help debug the ast tree
-    # for n in ast.node:
+    # for n in code_ast.body:
     #     print n.lineno,'->',n
 
     # Each separate command is available by iterating over ast.node. The
@@ -223,14 +214,7 @@ def split_blocks(python):
     # other situations that cause Discard nodes that shouldn't be discarded.
     # We might eventually discover other cases where lineno is None and have
     # to put in a more sophisticated test.
-    linenos = [x.lineno-1 for x in ast.node if x.lineno is not None]
-
-    # When we have a bare string as the first statement, it does not end up as
-    # a Discard Node in the AST as we might expect. Instead, it gets interpreted
-    # as the docstring of the module. Check for this case and prepend 0 (the
-    # first line number) to the list of linenos to account for it.
-    if ast.doc is not None:
-        linenos.insert(0, 0)
+    linenos = [x.lineno-1 for x in code_ast.body if x.lineno is not None]
 
     # When we finally get the slices, we will need to slice all the way to
     # the end even though we don't have a line number for it. Fortunately,
@@ -603,7 +587,7 @@ def _store(self, lines, buffer=None, store='source'):
 
         If input lines are not newline-terminated, a newline is automatically
         appended."""
-
+        
         if buffer is None:
             buffer = self._buffer
 
@@ -614,7 +598,7 @@ def _store(self, lines, buffer=None, store='source'):
         setattr(self, store, self._set_source(buffer))
 
     def _set_source(self, buffer):
-        return ''.join(buffer).encode(self.encoding)
+        return u''.join(buffer)
 
 
 #-----------------------------------------------------------------------------

diff --git a/IPython/core/interactiveshell.py b/IPython/core/interactiveshell.py
@@ -1550,12 +1550,14 @@ def init_readline(self):
             # otherwise we end up with a monster history after a while:
             readline.set_history_length(self.history_length)
 
+            stdin_encoding = sys.stdin.encoding or "utf-8"
+
             # Load the last 1000 lines from history
             for _, _, cell in self.history_manager.get_tail(1000,
                                                 include_latest=True):
                 if cell.strip(): # Ignore blank lines
                     for line in cell.splitlines():
-                        readline.add_history(line)
+                        readline.add_history(line.encode(stdin_encoding))
 
         # Configure auto-indent for all platforms
         self.set_autoindent(self.autoindent)
@@ -2105,7 +2107,6 @@ def run_cell(self, cell, store_history=True):
         if len(cell.splitlines()) <= 1:
             cell = self.prefilter_manager.prefilter_line(blocks[0])
             blocks = self.input_splitter.split_blocks(cell)
-
 
         # Store the 'ipython' version of the cell as well, since that's what
         # needs to go into the translated history and get executed (the
@@ -2246,7 +2247,7 @@ def run_source(self, source, filename=None,
         else:
             usource = source
 
-        if 0:  # dbg
+        if False:  # dbg
             print 'Source:', repr(source)  # dbg
             print 'USource:', repr(usource)  # dbg
             print 'type:', type(source) # dbg

diff --git a/IPython/core/magic.py b/IPython/core/magic.py
@@ -2063,7 +2063,8 @@ def magic_save(self,parameter_s = ''):
                 return
         cmds = self.extract_input_lines(ranges, 'r' in opts)
         with open(fname,'w') as f:
-            f.write(cmds)
+            f.write("# coding: utf-8\n")
+            f.write(cmds.encode("utf-8"))
         print 'The following commands were written to file `%s`:' % fname
         print cmds
 

diff --git a/IPython/core/tests/test_application.py b/IPython/core/tests/test_application.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+"""Tests for IPython.core.application"""
+
+import os
+import tempfile
+
+from IPython.core.application import Application
+
+def test_unicode_cwd():
+    """Check that IPython starts with non-ascii characters in the path."""
+    wd = tempfile.mkdtemp(suffix=u"€")
+
+    old_wd = os.getcwdu()
+    os.chdir(wd)
+    #raise Exception(repr(os.getcwd()))
+    try:
+        app = Application()
+        # The lines below are copied from Application.initialize()
+        app.create_default_config()
+        app.log_default_config()
+        app.set_default_config_log_level()
+
+        # Find resources needed for filesystem access, using information from
+        # the above two
+        app.find_ipython_dir()
+        app.find_resources()
+        app.find_config_file_name()
+        app.find_config_file_paths()
+
+        # File-based config
+        app.pre_load_file_config()
+        app.load_file_config(suppress_errors=False)
+    finally:
+        os.chdir(old_wd)
+
+def test_unicode_ipdir():
+    """Check that IPython starts with non-ascii characters in the IP dir."""
+    ipdir = tempfile.mkdtemp(suffix=u"€")
+
+    # Create the config file, so it tries to load it.
+    with open(os.path.join(ipdir, 'ipython_config.py'), "w") as f:
+        pass
+
+    old_ipdir1 = os.environ.pop("IPYTHONDIR", None)
+    old_ipdir2 = os.environ.pop("IPYTHON_DIR", None)
+    os.environ["IPYTHONDIR"] = ipdir.encode("utf-8")
+    try:
+        app = Application()
+        # The lines below are copied from Application.initialize()
+        app.create_default_config()
+        app.log_default_config()
+        app.set_default_config_log_level()
+
+        # Find resources needed for filesystem access, using information from
+        # the above two
+        app.find_ipython_dir()
+        app.find_resources()
+        app.find_config_file_name()
+        app.find_config_file_paths()
+
+        # File-based config
+        app.pre_load_file_config()
+        app.load_file_config(suppress_errors=False)
+    finally:
+        if old_ipdir1:
+            os.environ["IPYTHONDIR"] = old_ipdir1
+        if old_ipdir2:
+            os.environ["IPYTHONDIR"] = old_ipdir2
diff --git a/IPython/core/tests/test_compilerop.py b/IPython/core/tests/test_compilerop.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 """Tests for the compilerop module.
 """
 #-----------------------------------------------------------------------------
@@ -15,6 +16,7 @@
 
 # Stdlib imports
 import linecache
+import sys
 
 # Third-party imports
 import nose.tools as nt
@@ -46,6 +48,16 @@ def test_compiler():
     cp('x=1', 'single')
     nt.assert_true(len(linecache.cache) > ncache)
 
+def setUp():
+    # Check we're in a proper Python 2 environment (some imports, such
+    # as GTK, can change the default encoding, which can hide bugs.)
+    nt.assert_equal(sys.getdefaultencoding(), "ascii")
+
+def test_compiler_unicode():
+    cp = compilerop.CachingCompiler()
+    ncache = len(linecache.cache)
+    cp(u"t = 'žćčšđ'", "single")
+    nt.assert_true(len(linecache.cache) > ncache)
 
 def test_compiler_check_cache():
     """Test the compiler properly manages the cache.

diff --git a/IPython/core/tests/test_history.py b/IPython/core/tests/test_history.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 """Tests for the IPython tab-completion machinery.
 """
 #-----------------------------------------------------------------------------
@@ -16,8 +17,10 @@
 from IPython.utils.tempdir import TemporaryDirectory
 from IPython.core.history import HistoryManager, extract_hist_ranges
 
-def test_history():
+def setUp():
+    nt.assert_equal(sys.getdefaultencoding(), "ascii")
 
+def test_history():
     ip = get_ipython()
     with TemporaryDirectory() as tmpdir:
         #tmpdir = '/software/temp'
@@ -32,7 +35,7 @@ def test_history():
             ip.history_manager.init_db()  # Has to be called after changing file
             ip.history_manager.reset()
             print 'test',histfile
-            hist = ['a=1', 'def f():\n    test = 1\n    return test', 'b=2']
+            hist = ['a=1', 'def f():\n    test = 1\n    return test', u"b='€Æ¾÷ß'"]
             for i, h in enumerate(hist, start=1):
                 ip.history_manager.store_inputs(i, h)
 
@@ -82,7 +85,8 @@ def test_history():
             testfilename = os.path.realpath(os.path.join(tmpdir, "test.py"))
             ip.magic_save(testfilename + " ~1/1-3")
             testfile = open(testfilename, "r")
-            nt.assert_equal(testfile.read(), "\n".join(hist))
+            nt.assert_equal(testfile.read().decode("utf-8"),
+                    "# coding: utf-8\n" + "\n".join(hist))
 
             # Duplicate line numbers - check that it doesn't crash, and
             # gets a new session
@@ -92,6 +96,7 @@ def test_history():
             # Restore history manager
             ip.history_manager = hist_manager_ori
 
+
 def test_extract_hist_ranges():
     instr = "1 2/3 ~4/5-6 ~4/7-~4/9 ~9/2-~7/5"
     expected = [(0, 1, 2),  # 0 == current session

diff --git a/IPython/core/tests/test_inputsplitter.py b/IPython/core/tests/test_inputsplitter.py
@@ -364,7 +364,7 @@ def test_split_syntax_errors(self):
     def test_unicode(self):
         self.isp.push(u"Pérez")
         self.isp.push(u'\xc3\xa9')
-        self.isp.push("u'\xc3\xa9'")
+        self.isp.push(u"u'\xc3\xa9'")
 
 class InteractiveLoopTestCase(unittest.TestCase):
     """Tests for an interactive loop like a python shell.

diff --git a/IPython/core/tests/test_magic.py b/IPython/core/tests/test_magic.py
@@ -293,9 +293,9 @@ def test_parse_options():
 
 def test_dirops():
     """Test various directory handling operations."""
-    curpath = lambda :os.path.splitdrive(os.getcwd())[1].replace('\\','/')
+    curpath = lambda :os.path.splitdrive(os.getcwdu())[1].replace('\\','/')
 
-    startdir = os.getcwd()
+    startdir = os.getcwdu()
     ipdir = _ip.ipython_dir
     try:
         _ip.magic('cd "%s"' % ipdir)

diff --git a/IPython/testing/iptest.py b/IPython/testing/iptest.py
@@ -105,8 +105,6 @@ def test_for(mod):
 have['twisted'] = test_for('twisted')
 have['foolscap'] = test_for('foolscap')
 have['pexpect'] = test_for('pexpect')
-have['gtk'] = test_for('gtk')
-have['gobject'] = test_for('gobject')
 
 #-----------------------------------------------------------------------------
 # Functions and classes
@@ -170,9 +168,10 @@ def make_exclude():
 
     if not have['wx']:
         exclusions.append(ipjoin('lib', 'inputhookwx'))
-
-    if not have['gtk'] or not have['gobject']:
-        exclusions.append(ipjoin('lib', 'inputhookgtk'))
+
+    # We do this unconditionally, so that the test suite doesn't import
+    # gtk, changing the default encoding and masking some unicode bugs.
+    exclusions.append(ipjoin('lib', 'inputhookgtk'))
 
     # These have to be skipped on win32 because the use echo, rm, cd, etc.
     # See ticket https://bugs.launchpad.net/bugs/366982