# Functions

In [7]:
# Define a simple function
def my_func(a, b=2, c=None):
    """
    This is a docstring in which you describe what the
    function does. You can print this docstring by
    typing 'help(my_func)'.
    
    """
    print "1st argument = " + str(a)
    
    if c is not None:
        return b

In [9]:
help(my_func)

Help on function my_func in module __main__:

my_func(a, b=2, c=None)
    This is a docstring in which you describe what the
    function does. You can print this docstring by
    typing 'help(my_func)'.



In [10]:
my_func(34)

1st argument = 34


In [12]:
my_func(23, c='anything')

1st argument = 23


2

In [13]:
d = my_func(23, c='anything')
print d

1st argument = 23
2


In [14]:
d = my_func(23, c='anything', b=6)
print d

1st argument = 23
6


In [15]:
# Optional arguments must go after required arguments
d = my_func(b=6, 23)

SyntaxError: non-keyword arg after keyword arg (<ipython-input-15-0f142c872c36>, line 2)

# Modules
Modules are Python scripts that contain defined objects (variables, functions, etc).

In [17]:
# Import the Python standard library module 'os' for performing operating system commands
import os
dir(os)

['EX_CANTCREAT',
 'EX_CONFIG',
 'EX_DATAERR',
 'EX_IOERR',
 'EX_NOHOST',
 'EX_NOINPUT',
 'EX_NOPERM',
 'EX_NOUSER',
 'EX_OK',
 'EX_OSERR',
 'EX_OSFILE',
 'EX_PROTOCOL',
 'EX_SOFTWARE',
 'EX_TEMPFAIL',
 'EX_UNAVAILABLE',
 'EX_USAGE',
 'F_OK',
 'NGROUPS_MAX',
 'O_APPEND',
 'O_ASYNC',
 'O_CREAT',
 'O_DIRECT',
 'O_DIRECTORY',
 'O_DSYNC',
 'O_EXCL',
 'O_LARGEFILE',
 'O_NDELAY',
 'O_NOATIME',
 'O_NOCTTY',
 'O_NOFOLLOW',
 'O_NONBLOCK',
 'O_RDONLY',
 'O_RDWR',
 'O_RSYNC',
 'O_SYNC',
 'O_TRUNC',
 'O_WRONLY',
 'P_NOWAIT',
 'P_NOWAITO',
 'P_WAIT',
 'R_OK',
 'SEEK_CUR',
 'SEEK_END',
 'SEEK_SET',
 'TMP_MAX',
 'UserDict',
 'WCONTINUED',
 'WCOREDUMP',
 'WEXITSTATUS',
 'WIFCONTINUED',
 'WIFEXITED',
 'WIFSIGNALED',
 'WIFSTOPPED',
 'WNOHANG',
 'WSTOPSIG',
 'WTERMSIG',
 'WUNTRACED',
 'W_OK',
 'X_OK',
 '_Environ',
 '__all__',
 '__builtins__',
 '__doc__',
 '__file__',
 '__name__',
 '__package__',
 '_copy_reg',
 '_execvpe',
 '_exists',
 '_exit',
 '_get_exports_list',
 '_make_stat_result',
 '_make_statvfs_re

In [21]:
os.listdir('./')

['testfile.txt',
 'SA_Python_Learning_02_Basics.ipynb',
 '.ipynb_checkpoints',
 'SA_Python_Learning_01_Basics.ipynb']

In [24]:
# The os.path submodule provides functions for querying files and directories
for name in os.listdir('./'):
    msg = "is a file" if os.path.isfile(name) else "is not a file"  # Short-hand conditional expression
    print "{} {}".format(name, msg)

testfile.txt is a file
SA_Python_Learning_02_Basics.ipynb is a file
.ipynb_checkpoints is not a file
SA_Python_Learning_01_Basics.ipynb is a file


In [26]:
# Submodules (or any object) can be imported on their own using the 'from x import y' syntax
from os import path
path.abspath('./')

'/home/batsc/python/notebooks'

In [27]:
# Or if you wish to temporarily rename it for whatever reason, use 'as'
import os.path as p
p.abspath('./')

'/home/batsc/python/notebooks'

# NumPy
* The fundamental package for scientific computing with Python
* Uses pre-compiled C libraries for fast calculations
* Supports a much greater variety of numerical types than Python does:
 * int16, int32, int64, uint8, float16, complex64, etc.
* Introduces a powerful N-dimensional array object

In [3]:
# Standard method to import numpy
import numpy as np

In [4]:
# You can create N-dim array by converting Python array-like objects
numbers = [3, 67, 20]
numbers_array = np.array(numbers)
print numbers_array, type(numbers_array)

[ 3 67 20] <type 'numpy.ndarray'>


In [5]:
# Numpy arrays have an associated type, given by 'dtype'
print numbers_array.dtype

int64


In [6]:
# Conversion is straightforward
my_32bit_floats = numbers_array.astype(np.float32)
print my_32bit_floats, my_32bit_floats.dtype

[  3.  67.  20.] float32


In [7]:
# A 2D array creation using list of lists
a = [4, 6, 1]
b = [1, 8, 2]
c = [a, b]
array2D = np.array(c)
print array2D, array2D.shape

[[4 6 1]
 [1 8 2]] (2, 3)


In [8]:
# Built-in methods for array creation
a = 67.3 * np.random.random((43, 23))
print a.min(), a.max(), a.mean()

0.0325914872731 67.2486698524 33.1674304411


In [9]:
a = np.ones(10).cumsum()
print a

[  1.   2.   3.   4.   5.   6.   7.   8.   9.  10.]


In [10]:
# Reshaping is straightforward
a.shape = (5, 2)
print a

[[  1.   2.]
 [  3.   4.]
 [  5.   6.]
 [  7.   8.]
 [  9.  10.]]


In [11]:
# Indexing is similar to list indexing
print a[:][3]

[ 7.  8.]


In [12]:
# Shorthand uses commas, but note reversal of index position
print a[3, :]

[ 7.  8.]


## Numpy ufuncs
* Universal functions
* Operate element-wise on arrays

In [1]:
# Example of adding a number to values in a list in standard Python
a = [3, 2, 5, 4]
b = [val + 5 for val in a]  # Using list comprehension
print b

[8, 7, 10, 9]


In [4]:
# Numpy ufunc
import numpy as np
b = np.array(a) + 5
print b, list(b)

[ 8  7 10  9] [8, 7, 10, 9]


**ufuncs incorporate arithmetic, trigonometric functions, bitwise comparisons, exponential functions, etc**

For reference, a list of all operations: http://docs.scipy.org/doc/numpy/reference/ufuncs.html

In [9]:
# For example:
#   compare 2 random arrays element wise
#   sum the number of elements in array 1 that are larger than in array 2
array1 = np.sin(np.random.random(10000))
array2 = np.cos(np.random.random(10000))
print (array1 > array2).sum()

897


In [49]:
# Can be used in 2 ways
a = np.random.rayleigh(size=100)
print a.std(), np.std(a)


 0.701578261339 0.701578261339


### Numpy provides massive speed up

In [27]:
from random import random
c = [random() for i in range(100000)]
%timeit -qon 100 min(c)

<TimeitResult : 100 loops, best of 3: 1.96 ms per loop>

**For loops are SLOW in Python because types of objects are checked in each loop**

# AVOID LOOPS FOR CALCULATIONS!

In [28]:
c = np.random.random(10000)
%timeit -qon 100 c.min()

<TimeitResult : 100 loops, best of 3: 8.4 µs per loop>

#### Aggregations on multiple dimension arrays

In [37]:
m = np.random.randint(0, 10, (4, 6))
print m

[[0 1 4 1 2 8]
 [4 5 5 1 5 2]
 [1 8 4 9 9 0]
 [6 4 2 5 4 1]]


In [38]:
m.sum(axis=0)

array([11, 18, 15, 16, 20, 11])

In [39]:
m.sum(axis=1)

array([16, 22, 31, 22])

**Similar aggregations:** np.percentile(), np.nanmean(), np.std(), np.argmax(), etc. (see link above)

## Broadcasting
* Used for calculations involving different shaped arrays
* Broadcasting provides a means of vectorizing array operations so that **looping occurs in C instead of Python**
* The smaller array is “broadcast” across the larger array so that they have compatible shapes

In [50]:
a = np.ones((3, 3))
b = np.arange(3)
print a, b

[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]] [0 1 2]


In [51]:
print a + b

[[ 1.  2.  3.]
 [ 1.  2.  3.]
 [ 1.  2.  3.]]


## Masked arrays

In [52]:
a = np.random.randint(0, 10, (5,5))
print a

[[5 9 7 4 1]
 [2 6 1 4 2]
 [6 9 0 4 2]
 [6 6 9 9 3]
 [6 0 8 8 7]]


In [53]:
# Use np.ma submodule for masking
b = np.ma.array(a, mask=a>5)
print b

[[5 -- -- 4 1]
 [2 -- 1 4 2]
 [-- -- 0 4 2]
 [-- -- -- -- 3]
 [-- 0 -- -- --]]


In [54]:
# For masked arrays, ufuncs only apply to unmasked values
print a.sum(), b.sum()

124 28


In [59]:
# We can perform a similar operation on 'a' without creating 'b'
print a[~(a>5)].sum()  # '~' unary operator = invert a boolean (array)

28


In [None]:
Example nearest neighbour lookup???