In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# File IO and Exceptions

### Reading text files

In [None]:
help(open)

In [None]:
file_object = open('caterpillar_advice.txt', 'r') # 'r' is default

What type of object is __file_object__?

In [None]:
print file_object
print "file_object is of type",(type(file_object))

In [None]:
# file_object.<TAB>
dir(file_object) # attributes and methods of file objects

In [None]:
# Examples
print file_object.name
print file_object.mode
print file_object.closed

A file_object can be treated as a <font color='blue'>sequence</font> of strings. So, we can use the use <font color='blue'>for</font> loop to read each line.

In [None]:
for line in file_object:
    print line
file_object.close()

#### Note: Each line includes a non-printing character called the newline character "\n"

Exercise:

Read the text file caterpillar_advice.txt and count the number of lines *excluding* empty lines.

In [None]:
file_object = open('caterpillar_advice.txt', 'r')
count = 0
for line in file_object:
    if not line.startswith('\n'):
        count += 1
file_object.close()
count

In [None]:
my_file = open ('caterpillar_advice.txt','r')

# read() reads the _entire_ file, returns a string object
a_string = my_file.read()           
print "Contents of file are of type",type(a_string)

# close file handle
my_file.close()

# Now data is in memory
print '<', a_string, '>'

#### Read file chunks

In [None]:
my_file = open('caterpillar_advice.txt', 'r')
chunk = my_file.read(32)
print type(chunk)
print chunk, ' * '
while chunk != "":
   chunk = my_file.read(32)   # read in 32-byte chunk sizes   
   print chunk, ' * '
    
my_file.close()

#### Read one line at a time

In [None]:
my_file = open ('caterpillar_advice.txt')
line = my_file.readline()
type(line)
line
my_file.close()

#### Read all the lines in the text file

In [None]:
my_file = open ('caterpillar_advice.txt')
lines = my_file.readlines()
type(lines)
lines                
my_file.close()

### Automatically closing files

In [None]:
# to open a file, process its contents, and make sure to close it, you can simply do:

with open ('caterpillar_advice.txt', 'r') as f:
    data = f.read() 
    print 'Is file open? ',(f.closed)
    # file will be closed after exiting this block of code
    
print 'Is file open? ',(f.closed)
print (f.mode)

### Writing text files

In [None]:
with open ('write_test.txt', 'w+') as f:
    f.write('Test')

In [None]:
!cat write_test.txt

In [None]:
with open ('caterpillar_advice.txt', 'r') as rf:
    with open ('caterpillar_advice.txt_copy.txt', 'w') as wf:
        for line in rf:
            wf.write(line)


In [None]:
!cat caterpillar_advice.txt_copy.txt

### Summary of open() file access modes

<table style="width:100%">
  <tr>
    <th>Mode</th>
    <th>Description</th> 
  </tr>
  <tr>
    <td>r</td>
    <td>Opens a file for reading only. Default mode.</td> 
  </tr>
  <tr>
    <td>rb</td>
    <td>Opens a file for reading only in binary format.</td> 
  </tr>
  <tr>
    <td>r+</td>
    <td>Opens a file for both reading and writing.</td> 
  </tr>
  <tr>
    <td>rb+</td>
    <td>Opens a file for both reading and writing in binary format.</td> 
  </tr>
  <tr>
    <td>w</td>
    <td>Opens a file for writing only. Overwrites file if it exists. Creates a new file if it does not exist.</td> 
  </tr>
  <tr>
    <td>wb</td>
    <td>Opens a file for writing only in binary format.</td> 
  </tr>
  <tr>
    <td>w+</td>
    <td>Opens a file for both writing and reading.</td> 
  </tr>
  <tr>
    <td>wb+</td>
    <td>Opens a file for both writing and reading in binary format.</td> 
  </tr>
    <td>a</td>
    <td>Opens a file for appending. The file pointer is at the end of the file if it exists. </td> 
  </tr>
  <tr>
    <td>ab</td>
    <td>Opens a file for appending in binary format.</td> 
  </tr>
  <tr>
    <td>a+</td>
    <td>Opens a file for both appending and reading.</td> 
  </tr>
  <tr>
    <td>ab+</td>
    <td>Opens a file for both appending and reading in binary format.</td> 
  </tr>
</table>

## Summary of basic file IO functions and methods

<table style="width:100%">
  <tr>
    <th>Methods and functions</th>
    <th>Description</th> 
  </tr>
  <tr>
    <td>open()</td>
    <td>Returns a file object and is most commonly used with two arguments: open(filename, mode)</td> 
  </tr>
  <tr>
    <td>file.close()</td>
    <td>Close the file.</td> 
  </tr>
  <tr>
    <td>file.read([size])</td>
    <td>Read the entire file. If size is specified then read at most size bytes.</td> 
  </tr>
  <tr>
    <td>file.readline([size])</td>
    <td>Read one line from the file. If size is specified then read at most size bytes.</td> 
  </tr>
  <tr>
    <td>file.readlines([size])</td>
    <td>Read all the lines from the file. If size is specified then read at most size bytes.</td> 
  </tr>
  <tr>
    <td>file.tell()</td>
    <td>Returns file object's current position in the file.</td> 
  </tr>
  <tr>
    <td>file.seek(int)</td>
    <td>Changes the file object's current position to the given int.</td> 
  <tr>
    <td>file.write(string)</td>
    <td>Writes the contents of string to the file.</td> 
  </tr>
</table>

### Handling delimited files

In [None]:
!cat grades.csv

One could use csv package: 
```cython
import csv
```

In [None]:
with open('grades.csv', 'r') as f:
    for line in f:
        print line.strip().split(',')

Each row of the input data is parsed and converted to a list of strings.

### Binary data IO

The differences between binary and ascii encoding won't be obvious for simple alphanumeric strings, but will become important if you're processing text that includes characters not in the ascii character set.

In [None]:
with open ('cat.jpg', 'r') as rf:
    with open ('copy_cat.jpg', 'w') as wf:
        for line in rf:
            wf.write(line)

In [None]:
with open('cat.jpg', 'rb') as f:
    data = f.readline()
print (data)

In [None]:
':'.join(x.encode('hex') for x in data)

Hex dump is useful for debugging. In a hex dump, each byte (8-bits) is represented as a two-digit hexadecimal number.

In [None]:
with open('cat.jpg', 'rb') as f:
    data = f.read()
 
    if data.startswith(b'\xff\xd8'):
        info = 'This is a jpeg file (%d bytes long)'
    else:
        info = 'This is a random file (%d bytes long)'

    print (info % len(data))

In [None]:
from IPython.display import Image
kitty = Image(filename = 'cat.jpg')
kitty

### OS dependent functions

In [None]:
import os

Python os module provides methods that help you perform all kinds of file-processing operations, such as renaming and deleting files (as well as file IO).

In [None]:
help(os.read)
fd = os.open('caterpillar_advice.txt', os.O_RDWR)
ret = os.read(fd, 15)
print 'Result from os.read:'+'\n'+20*'-'+'\n'+ret
os.close(fd)

#### You need to test whether or not a file or directory exists.

In [None]:
print os.path.exists('/etc/passwd')
print os.path.exists('/etc/spam')

In [None]:
filename = '/etc/spam'
if os.path.exists(filename):
    with open(filename) as f:
        data = f.readline()
    print data    
else:
    print (filename + ' does not exist')

#### List files in current directory

In [None]:
listdir =  os.listdir(".")
for file in listdir:
   print file

#### Dealing with directories

In [None]:
os.mkdir("newdir")
os.chdir("newdir")
print os.getcwd()

In [None]:
os.chdir("..")
print os.getcwd()

In [None]:
os.rmdir("newdir")

#### Accessing environment

In [None]:
print os.environ['HOME']
print os.environ.has_key("HOME")

# using get will return `None` if a key is not present rather than raise a `KeyError`
print os.environ.get('KEY_THAT_MIGHT_EXIST')

# os.getenv is equivalent, and can also give a default value instead of `None`
print os.getenv('KEY_THAT_MIGHT_EXIST', 'SOME_KEY')

# Setting variable
os.environ['PythonTraining'] = 'is fun'
print os.environ.get('PythonTraining')

#### Other utilities

In [None]:
print os.path.isfile('/etc/passwd')
print os.path.isdir('/etc/passwd')
print os.path.islink('python')
print os.path.realpath('python')
print os.path.getsize('/etc/passwd')

In [None]:
import shutil

The shutil module offers a number of high-level operations on <b>files and collections of files</b>. In particular, functions are provided which support file copying and removal. 
For operations on individual files, see also the os module.

In [None]:
help (shutil.copy)
help (shutil.move)

# Exceptions and exception handling

### Syntax vs. run-time errors
A <b>python</b> program with a syntax error will execute no steps at all, but a program with a run-time error will execute the steps that happened before the error occured.

In [None]:
print "programming is fun, debugging programs is not"
print 'This does not make sense: ' 1/0

In [None]:
print "programming is fun, debugging programs is not"
print 'This does not make sense: ',1/0

#### In Python an <font color='blue'>exception</font> is an object that is <font color='blue'>raised</font> when an "exceptional" situation occurs. 

## Built-in exceptions

<table style="width:100%">
  <tr>
    <th>Exception name</th>
    <th>Description</th> 
  </tr>
  <tr>
    <td>IOError</td>
    <td>Raised when an IO operation fails.</td> 
  </tr>
  <tr>
    <td>KeyError</td>
    <td>Raised when a mapping (dictionary) key is not found in the set of existing keys.</td> 
  </tr>
  <tr>
    <td>NameError</td>
    <td>Raised when an identifier is not found in the local or global namespace.</td> 
  </tr>
  <tr>
    <td>OSError</td>
    <td>Raised when a function returns a system-related error.</td> 
  </tr>
  <tr>
    <td>ValueError</td>
    <td>Raised when a built-in operation or function receives an argument that has the right type but an inappropriate value.</td> 
  </tr>
  <tr>
    <td>ZeroDivisionError</td>
    <td>Raised when the second argument of a division or modulo operation is zero.</td> 
  </tr>
</table>

For a full list see https://docs.python.org/2/library/exceptions.html

### raise statement

One can generate exceptions using the <font color='blue'>raise</font> statement, which takes an argument that must be an instance of the class <font color='blue'>BaseException</font> or a class derived from it.

A typical use of exceptions is to abort functions when some error condition occurs, for example:

In [None]:
def compute(param):
    if (param < 1.):
        raise Exception("Invalid parameter")
compute(.1)

__When an exception is raised, and is not handled by the code, it propagates back to the calling code (and its calling code) until it's handled.__

### How do we "handle" exceptions

#### We use the <font color='blue'>try...except</font> block. 


In [None]:
try:                                                      # required keyword
    print '< Try: Block of code that harbors the risk of an exception >' # exception generated in this block
except:                                                   # required keyword
    print '*** There was a runtime error ***'             # exception is caught here
    
print '< We are outside the try statement >'

In [None]:
(x,y) = (5,0)
try:
    z = x/y
except ZeroDivisionError:
    print "divide by zero"

#### General <font color='blue'>try...except</font> block

In [None]:
try:                                                      # required keyword
    print '< Try: Block of code that harbors the risk of an exception >' # exception generated in this block
except:                                                   # required keyword
    print '*** There was a runtime error ***'             # exception is caught here
else:                                                     # optional keyword
    print '< Else: There was no exception >'
finally:                                                  # optional keyword - like a clean-up step
    print '< Finally: Exception handling is finished - always executed >'
    
print '< We are outside the try statement >'

### Exception Handling and File Processing

In [None]:
with open ('nonExistentFile.txt', 'r') as f:
   data = my_file.readlines()

In [None]:
try:
    with open ('nonExistentFile.txt', 'r') as f:
        data = f.readline()
except IOError:
    print 'File does not exist'
else:
    print data

#### <font color='red'>Summary</font>: 

Exception handling is a useful feature of Python - more art than science. 

Raise exceptions whenever something unexpected occurs, and catch them only where you can do something about them.

Exercise:

Add a try-except block to the follwing code, i.e. instead of performing checks (using 'if') before we do something, we just do it – and if an error does occur we handle it.

In [None]:
n = None
while n is None:
    s = raw_input("Please enter an integer: ")
    if s.isdigit():
        n = int(s)
    else:
        print("%s is not an integer." % s)