# Built-in Data Structures, Functions, and Files
Modified from SOURCE: McKinney, W. (2018). Python for data analysis. Chapter 3. O'Reilly Media Inc.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Structures and Sequences

SEE:
https://docs.python.org/3/py-modindex.html#cap-f

or

https://docs.python.org/3/library/stdtypes.html?highlight=sequence

or

https://docs.python.org/3/tutorial/datastructures.html?highlight=data%20structures


### Tuple

A tuple is an immutable sequence of Python objects.

In [2]:
tup = 4, 5, 6 #a fixed length sequence of immutable of objects
tup

(4, 5, 6)

In [3]:
nested_tup = (4, 5, 6), (7, 8) #a tuple of tuples
nested_tup

((4, 5, 6), (7, 8))

In [4]:
tup= tuple([4, 0, 2])  #You can convert any sequence or iterator to a tuple by invoing 'tuple'
tup = tuple('string')
tup

('s', 't', 'r', 'i', 'n', 'g')

In [5]:
tup[0] #Indexes start at 0

's'

In [6]:
tup = tuple(['foo', [1, 2], True]) #Tuples in Python are immutable, meaning their elements cannot be changed once
                                #they are assigned.
#tup[2] = False  Causes an error.

In [7]:
tup[1].append(3) #The list inside a tuple can be modified becase lists are mutable. Notice the index [1].
tup

('foo', [1, 2, 3], True)

In [8]:
(4, None, 'foo') + (6, 0) + ('bar',) #You can concatenate tuples by using the '+' operator

(4, None, 'foo', 6, 0, 'bar')

In [9]:
('foo', 'bar') * 4 #Appending the tuple 4 times.

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

#### Unpacking tuples

In [10]:
tup = (4, 5, 6)
a, b, c = tup #Assigning the values of a tuple in sequence.
b

5

In [11]:
tup = 4, 5, (6, 7)
a, b, (c, d) = tup
d

7

tmp = a  #Reassigning values can take several steps in some languages.
a = b
b = tmp

In [12]:
a, b = 1, 2 #In Python swapping or reassigning values is much easier.
a
b
b, a = a, b
a
b

1

In [13]:
#A common task is to interate over sequences of tuples or lists.

seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
for a, b, c in seq:
    print('a={0}, b={1}, c={2}'.format(a, b, c))

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [14]:
values = 1, 2, 3, 4, 5
a, b, *rest = values #Assign the rest of the vlaues in sequence.
a, b
rest

[3, 4, 5]

In [15]:
a, b, *_ = values  #Some programmers prefer to '_' for unwanted values.

#### Tuple methods

In [16]:
#The size and contents of a tuple cannot be modified so there are very few methods assigned to the tuple objects.

#However, you can count the elements.

a = (1, 2, 2, 2, 3, 4, 2) #There are four 2's in the list
a.count(2)

4

In [17]:
%lsmagic A list of majic commands

Available line magics:
%alias  %alias_magic  %autoawait  %autocall  %automagic  %autosave  %bookmark  %cat  %cd  %clear  %colors  %conda  %config  %connect_info  %cp  %debug  %dhist  %dirs  %doctest_mode  %ed  %edit  %env  %gui  %hist  %history  %killbgscripts  %ldir  %less  %lf  %lk  %ll  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %lx  %macro  %magic  %man  %matplotlib  %mkdir  %more  %mv  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %pip  %popd  %pprint  %precision  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %rep  %rerun  %reset  %reset_selective  %rm  %rmdir  %run  %save  %sc  %set_env  %shell  %store  %sx  %system  %tb  %tensorflow_version  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%bigquery  %%capture  %%debug  %%file  %%html  %%javascript  %%js  %%late

In [18]:
help(a[4])  #You can use the magic commands to find information about the oblject.

tuple?

Help on int object:

class int(object)
 |  int([x]) -> integer
 |  int(x, base=10) -> integer
 |  
 |  Convert a number or string to an integer, or return 0 if no arguments
 |  are given.  If x is a number, return x.__int__().  For floating point
 |  numbers, this truncates towards zero.
 |  
 |  If x is not a number or if base is given, then x must be a string,
 |  bytes, or bytearray instance representing an integer literal in the
 |  given base.  The literal can be preceded by '+' or '-' and be surrounded
 |  by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
 |  Base 0 means to interpret the base from the string as an integer literal.
 |  >>> int('0b100', base=0)
 |  4
 |  
 |  Built-in subclasses:
 |      bool
 |  
 |  Methods defined here:
 |  
 |  __abs__(self, /)
 |      abs(self)
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __bool__(self, /)
 |      True if self else False
 |

### List

In [19]:
#Lists are variable-length objects whose contents can change.
a_list = [2, 3, 7, None]
tup = ('foo', 'bar', 'baz')  #Defines a tuple
tup
b_list = list(tup) #Converts a tuple to a list indicated by brackets.
b_list
b_list[1] = 'peekaboo' #Changes the index [2] tp 'peekaboo'
b_list

['foo', 'peekaboo', 'baz']

In [20]:
gen = range(10)
gen
list(gen)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

#### Adding and removing elements

In [21]:


b_list.append('dwarf')  #adds an element to the end of the list
b_list

['foo', 'peekaboo', 'baz', 'dwarf']

In [22]:
#.innsert inserts an object at the specified location.  Again, not the index [1].
#The index must be between 0 and the length of the lists inclusive.

b_list.insert(1, 'red')
b_list

['foo', 'red', 'peekaboo', 'baz', 'dwarf']

In [23]:
b_list.pop(2)  #.pop removes and element at a location and returns the element.
b_list

['foo', 'red', 'baz', 'dwarf']

In [24]:
b_list.append('foo')
b_list
b_list.remove('foo')  #.remove locates the first instance of the value and removes it from the list.
b_list

['red', 'baz', 'dwarf', 'foo']

In [25]:
'dwarf' in b_list

True

In [26]:
'dwarf' not in b_list

False

#### Concatenating and combining lists

In [27]:
#Computationally, the '+' is an expensive operation.

[4, None, 'foo'] + [7, 8, (2, 3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [28]:
x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x

[4, None, 'foo', 7, 8, (2, 3)]

everything = []
for chunk in list_of_lists:
    everything.extend(chunk)

everything = []
for chunk in list_of_lists:
    everything = everything + chunk
    
The extend() method is a built-in method for lists in Python that allows you to add multiple elements to an existing list. It takes an iterable (such as another list) as an argument and appends each element from the iterable to the end of the original list.

EXAMPLE:

everything = []
for chunck in list_of_lists:
    everything.extend(chunck)
    
is faster than

everything = []
for chunck in list_of_lists:
    everything = everything + chunk

#### Sorting

In [29]:
a = [7, 2, 5, 1, 3]
a.sort() #Sorts a lists in place
a

[1, 2, 3, 5, 7]

In [30]:
b = ['saw', 'small', 'He', 'foxes', 'six']
b.sort(key=len) #The sort 'key=x' option sorts by length.
b

['He', 'saw', 'six', 'small', 'foxes']

#### Binary search and maintaining a sorted list

In [31]:
#imports bisect module which completes a binary search and finds the location to insert an object while
#keeping it sorted.

import bisect
c = [1, 2, 2, 2, 3, 4, 7]
bisect.bisect(c, 2)
bisect.bisect(c, 5)
bisect.insort(c, 6)
c

[1, 2, 2, 2, 3, 4, 6, 7]

#### Slicing

In [32]:
#You can select sectins of mosts sequence types.
#In Python, sequences that cannot be sliced are generally those that do not support
#the concept of indexing or have a variable length.

#use 'start:stop' by passing the '':' operator
#Notice indexing starts with 0
seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[1:5]

[2, 3, 7, 5]

In [33]:
seq[3:4] = [6, 3]
seq

[7, 2, 3, 6, 3, 5, 6, 0, 1]

In [34]:
#Either start or stop can be ommitted

seq[:5]

[7, 2, 3, 6, 3]

In [35]:
seq[3:]

[6, 3, 5, 6, 0, 1]

In [36]:
#Negative indices slice the sequence relative to the end.
seq[-4:]


[5, 6, 0, 1]

In [37]:
seq[-6:-2]

[6, 3, 5, 6]

In [38]:
#Take very element after the second colon.  Take every other element.
seq[::2]

[7, 3, 3, 6, 1]

In [39]:
#reverses the list of the tuple.
seq[::-1]

[1, 0, 6, 5, 3, 6, 3, 2, 7]

### Built-in Sequence Functions

#### enumerate

i = 0
for value in collection:
   # do something with value
   i += 1

for i, value in enumerate(collection):
   # do something with value

In [40]:
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i, v in enumerate(some_list):
    mapping[v] = i
mapping

{'foo': 0, 'bar': 1, 'baz': 2}

#### sorted

In [41]:
sorted([7, 1, 2, 6, 0, 3, 2])
sorted('horse race')

[' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']

#### zip

In [42]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [43]:
seq3 = [False, True]
list(zip(seq1, seq2, seq3))

[('foo', 'one', False), ('bar', 'two', True)]

In [44]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))

0: foo, one
1: bar, two
2: baz, three


In [45]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),
            ('Curt', 'Schilling')]
first_names, last_names = zip(*pitchers)
first_names
last_names

('Ryan', 'Clemens', 'Schilling')

#### reversed

In [46]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

### dict

In [47]:
empty_dict = {}
d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}
d1

{'a': 'some value', 'b': [1, 2, 3, 4]}

In [48]:
d1[7] = 'an integer'
d1
d1['b']

[1, 2, 3, 4]

In [49]:
'b' in d1

True

In [50]:
d1[5] = 'some value'
d1
d1['dummy'] = 'another value'
d1
del d1[5]
d1
ret = d1.pop('dummy')
ret
d1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [51]:
list(d1.keys())
list(d1.values())

['some value', [1, 2, 3, 4], 'an integer']

In [52]:
d1.update({'b' : 'foo', 'c' : 12})
d1

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

#### Creating dicts from sequences

mapping = {}
for key, value in zip(key_list, value_list):
    mapping[key] = value

In [53]:
mapping = dict(zip(range(5), reversed(range(5))))
mapping

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

#### Default values

if key in some_dict:
    value = some_dict[key]
else:
    value = default_value

value = some_dict.get(key, default_value)

In [54]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

for word in words:
    letter = word[0]
    by_letter.setdefault(letter, []).append(word)

from collections import defaultdict
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)

#### Valid dict key types

In [55]:
hash('string')
hash((1, 2, (2, 3)))
#hash((1, 2, [2, 3])) # fails because lists are mutable

-9209053662355515447

In [56]:
d = {}
d[tuple([1, 2, 3])] = 5
d

{(1, 2, 3): 5}

### set

In [57]:
set([2, 2, 2, 1, 3, 3])
{2, 2, 2, 1, 3, 3}

{1, 2, 3}

In [58]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

In [59]:
a.union(b)
a | b

{1, 2, 3, 4, 5, 6, 7, 8}

In [60]:
a.intersection(b)
a & b

{3, 4, 5}

In [61]:
c = a.copy()
c |= b
c
d = a.copy()
d &= b
d

{3, 4, 5}

In [62]:
my_data = [1, 2, 3, 4]
my_set = {tuple(my_data)}
my_set

{(1, 2, 3, 4)}

In [63]:
a_set = {1, 2, 3, 4, 5}
{1, 2, 3}.issubset(a_set)
a_set.issuperset({1, 2, 3})

True

In [64]:
{1, 2, 3} == {3, 2, 1}

True

### List, Set, and Dict Comprehensions

[

result = []
for val in collection:
    if

In [65]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']
[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

dict_comp = {

set_comp = {

In [66]:
unique_lengths = {len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

In [67]:
set(map(len, strings))

{1, 2, 3, 4, 6}

In [68]:
loc_mapping = {val : index for index, val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

#### Nested list comprehensions

In [69]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('e') >= 2]
    names_of_interest.extend(enough_es)

In [70]:
result = [name for names in all_data for name in names
          if name.count('e') >= 2]
result

['Steven']

In [71]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattened = [x for tup in some_tuples for x in tup]
flattened

[1, 2, 3, 4, 5, 6, 7, 8, 9]

flattened = []

for tup in some_tuples:
    for x in tup:
        flattened.append(x)

In [72]:
[[x for x in tup] for tup in some_tuples]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

## Functions

def my_function(x, y, z=1.5):
    if z > 1:
        return z * (x + y)
    else:
        return z / (x + y)

my_function(5, 6, z=0.7)
my_function(3.14, 7, 3.5)
my_function(10, 20)

### Namespaces, Scope, and Local Functions

def func():
    a = []
    for i in range(5):
        a.append(i)

a = []
def func():
    for i in range(5):
        a.append(i)

In [73]:
a = None
def bind_a_variable():
    global a
    a = []
bind_a_variable()
print(a)

[]


### Returning Multiple Values

def f():
    a = 5
    b = 6
    c = 7
    return a, b, c

a, b, c = f()

return_value = f()

def f():
    a = 5
    b = 6
    c = 7
    return {'a' : a, 'b' : b, 'c' : c}

### Functions Are Objects

In [74]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south   carolina##', 'West virginia?']

In [75]:
import re

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

In [76]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [77]:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

In [78]:
clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [79]:
for x in map(remove_punctuation, states):
    print(x)

   Alabama 
Georgia
Georgia
georgia
FlOrIda
south   carolina
West virginia


### Anonymous (Lambda) Functions

def short_function(x):
    return x * 2

equiv_anon = lambda x: x * 2

def apply_to_list(some_list, f):
    return [f(x) for x in some_list]

ints = [4, 0, 1, 5, 6]
apply_to_list(ints, lambda x: x * 2)

In [80]:
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']

In [81]:
strings.sort(key=lambda x: len(set(x)))
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

### Currying: Partial Argument Application

def add_numbers(x, y):
    return x + y

add_five = lambda y: add_numbers(5, y)

from functools import partial
add_five = partial(add_numbers, 5)

### Generators

In [82]:
some_dict = {'a': 1, 'b': 2, 'c': 3}
for key in some_dict:
    print(key)

a
b
c


In [83]:
dict_iterator = iter(some_dict)
dict_iterator

<dict_keyiterator at 0x7fc518091c10>

In [84]:
list(dict_iterator)

['a', 'b', 'c']

In [85]:
def squares(n=10):
    print('Generating squares from 1 to {0}'.format(n ** 2))
    for i in range(1, n + 1):
        yield i ** 2

In [86]:
gen = squares()
gen

<generator object squares at 0x7fc5180a3ae0>

In [87]:
for x in gen:
    print(x, end=' ')

Generating squares from 1 to 100
1 4 9 16 25 36 49 64 81 100 

#### Generator expresssions

In [88]:
gen = (x ** 2 for x in range(100))
gen

<generator object <genexpr> at 0x7fc51813da10>

def _make_gen():
    for x in range(100):
        yield x ** 2
gen = _make_gen()

In [89]:
sum(x ** 2 for x in range(100))
dict((i, i **2) for i in range(5))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

#### itertools module

In [90]:
import itertools
first_letter = lambda x: x[0]
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']
for letter, names in itertools.groupby(names, first_letter):
    print(letter, list(names)) # names is a generator

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


### Errors and Exception Handling

In [91]:
float('1.2345')
#float('something') #Fails because could not convert string to float: 'something'

1.2345

In [92]:
def attempt_float(x):
    try:
        return float(x)
    except:
        return x

In [93]:
attempt_float('1.2345')
attempt_float('something')

'something'

In [94]:
#float((1, 2)) #Fails because float() argument must be a string or a number, not 'tuple'

In [95]:
def attempt_float(x):
    try:
        return float(x)
    except ValueError:
        return x

In [96]:
#attempt_float((1, 2)) #Fails float() argument must be a string or a number, not 'tuple'

In [97]:
def attempt_float(x):
    try:
        return float(x)
    except (TypeError, ValueError):
        return x

In [98]:
attempt_float((1, 2))

(1, 2)

f = open(path, 'w')

try:
    write_to_file(f)
finally:
    f.close()

f = open(path, 'w')

try:
    write_to_file(f)
except:
    print('Failed')
else:
    print('Succeeded')
finally:
    f.close()

#### Exceptions in IPython

In [10]: %run examples/ipython_bug.py
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
/home/wesm/code/pydata-book/examples/ipython_bug.py in <module>()
     13     throws_an_exception()
     14
---> 15 calling_things()

/home/wesm/code/pydata-book/examples/ipython_bug.py in calling_things()
     11 def calling_things():
     12     works_fine()
---> 13     throws_an_exception()
     14
     15 calling_things()

/home/wesm/code/pydata-book/examples/ipython_bug.py in throws_an_exception()
      7     a = 5
      8     b = 6
----> 9     assert(a + b == 10)
     10
     11 def calling_things():

AssertionError:

## Files and the Operating System

In [99]:
#%pushd is a Jupyter magic command that opens a file location for processing.
#In Python SEE https://docs.python.org/3/library/functions.html#open, or
#https://docs.python.org/3/library/csv.html?highlight=read
#In Pandas we will use the pandas.read_csv, pandas.read_excel, etc. methods to read files.

%pushd "/content/drive/MyDrive/MSSP607/Modules/Week3"

/content/drive/MyDrive/MSSP607/Modules/Week3


['/content']

In [100]:
#By default files are openned in read only mode.  Can also use 'w', 'x', etc.
#SEE https://docs.python.org/3/library/functions.html#open
path = 'examples/segismundo.txt'
f = open(path)

We can treat f as a list and interate over it.
for line in f:
    pass

In [101]:
#Alternatively, using a lamda function, we can interate over the file until the EOL flag is set.
#A lambda function, also known as an anonymous function, is a way to create small, one-line
#functions in Python without explicitly defining a named function.
lines = [x.rstrip() for x in open(path)]
lines

['Sueña el rico en su riqueza,',
 'que más cuidados le ofrece;',
 '',
 'sueña el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sueña el que a medrar empieza,',
 'sueña el que afana y pretende,',
 'sueña el que agravia y ofende,',
 '',
 'y en el mundo, en conclusión,',
 'todos sueñan lo que son,',
 'aunque ninguno lo entiende.',
 '']

In [102]:
#When you use open() to create file objects, make sure you close the file when done to release resources back to the system.
f.close()

In [103]:
#rstrip() is a method that removie trailing characters from a string.
#rstrip will automatically close the file when exiting the block.
with open(path) as f:
    lines = [x.rstrip() for x in f]

In [104]:
#.read method advances the file handles position by the nunber of bytes read.
# Tere are other methods syucb as .seek, .tell, etc.
f = open(path)
f.read(10)
f2 = open(path, 'rb')  # Binary mode
f2.read(10)

b'Sue\xc3\xb1a el '

In [105]:
#The .tell() method is a file method in Python that is used to determine the current position
#(in bytes) within a file. It returns an integer representing the current file position indicator.
f.tell()
f2.tell()

10

In [106]:
#The sys module provides access to system-specific parameters and functions, allowing interaction with the Python
#interpreter and operating system.
import sys
sys.getdefaultencoding()

'utf-8'

In [107]:
#The .seek() method is a file method in Python that is used to change the current position (in bytes) within a file.
#It allows you to move the file position indicator to a specific location within the file. This can be useful
#for reading or writing data at a specific position within the file.
f.seek(3)
f.read(1)

'ñ'

In [108]:
f.close()
f2.close()

In [109]:
#Opens a file for writing.
with open('tmp.txt', 'w') as handle:
    handle.writelines(x for x in open(path) if len(x) > 1)
with open('tmp.txt') as f:
    lines = f.readlines()
lines

['Sueña el rico en su riqueza,\n',
 'que más cuidados le ofrece;\n',
 'sueña el pobre que padece\n',
 'su miseria y su pobreza;\n',
 'sueña el que a medrar empieza,\n',
 'sueña el que afana y pretende,\n',
 'sueña el que agravia y ofende,\n',
 'y en el mundo, en conclusión,\n',
 'todos sueñan lo que son,\n',
 'aunque ninguno lo entiende.\n']

In [110]:
import os
os.remove('tmp.txt')

### Bytes and Unicode with Files

In [111]:
with open(path) as f:
    chars = f.read(10)
chars

'Sueña el r'

In [112]:
#open paths as 'rb': readn only, binary
with open(path, 'rb') as f:
    data = f.read(10)
data

b'Sue\xc3\xb1a el '

In [113]:
data.decode('utf8')
#data[:4].decode('utf8') #'utf-8' codec can't decode byte 0xc3 in position 3: unexpected end of data

'Sueña el '

In [114]:
sink_path = 'sink.txt'
with open(path) as source:
    with open(sink_path, 'xt', encoding='iso-8859-1') as sink:
        sink.write(source.read())
with open(sink_path, encoding='iso-8859-1') as f:
    print(f.read(10))

Sueña el r


In [115]:
os.remove(sink_path)

In [116]:
f = open(path)
f.read(5)
f.seek(4)
#f.read(1)
f.close()

In [117]:
#It is a magic command that is used to remove a directory from the directory stack and
#change the current working directory (cwd) to the previously active directory.
%popd

/content
popd -> /content


## Conclusion