# PNI Biomath Bootcamp 2016 -- Programming module -- Day 3


### Items to be covered

3. Example from yesterday's exercises: Make bar plot of letter counts with the corresponding letter on top of each bar (plotting, loops, text in plots)
5. Matrix notation re-visited; Ida's shelves slides presentation
    * matrix times a scalar; matrix addition; matrix multiplication (dot product)
    * `np.where()`, and using the resulting indices into matrices 
    * In-class example: Sieve of Eratosthenes
1. Saving and loading data: np.savez (we'll do hdf5 tomorrow)
    * Dictionaries; iterating over dictionaries
5. Plotting and printing figures out to files
6. Animation example





In [None]:
%matplotlib notebook
# use %matplotlib inline    for regular plotting
# use %matplotlib notebook  if you want to be able to do animations
# remember: at the start of the session or after restarting kernel

import numpy as np
import matplotlib.pyplot as plt


def printvar(var, str):
    """shorthand for printing out a string (typically the name of a variable) and a variable
    
    PARAMETERS:
        var      an expression
        str       a string
        
    RETURNS:  none        
    """
    print("\n", str, " = \n", var, "\n")

    
x = [1, 2, 3]
mystr = 'this'
printvar(x, mystr)

# Example from yesterday's exercises: bar plot of letter counts with letter on top

In [None]:
from scipy.io import loadmat
mystring = loadmat('joyce.mat')['str'][0]

# print(mystring)

lmystring = list(mystring)
all_letters = np.unique(lmystring)  # a list of each individual letter in mystring

# for x in all_letters:
  
def count_items_in_list(desired_item, mylist):
    """
    Counts how many desired_item are found in mylist and returns the count
    
    PARAMS:
        desired_item    an element to be searched for
        
        mylist          a list (can be an np.array)
        
    RETURNS:
        the final count (a scalar)
    """
    count=0
    for i in mylist:
        if i==desired_item:
            count = count +1
    return count


def count_items_in_list(desired_item, mylist):
    """
    Counts how many desired_item are found in mylist and returns the count
    
    PARAMS:
        desired_item    an element to be searched for
        
        mylist          a list (can be an np.array)
        
    RETURNS:
        the final count (a scalar)
    """
    return len([1 for x in mylist if x==desired_item])
    
mylist = [1, 2, 3, 2, 4, 5, 2]
print(count_items_in_list(2, mylist))


# lmystring = ['a', 'b', 'a', 'c']
# all_letters = np.unique(lmystring)

letter_count = []
for x in all_letters:
    letter_count += [count_items_in_list(x, lmystring)]
    
# printvar(letter_count, "letter_count")
# printvar(all_letters, "all_letters")
# print(mystring)

num_letts = len(all_letters)
plt.bar(range(0, num_letts), letter_count)
for i in range(0, num_letts):
    t = plt.text(i+0.5, letter_count[i], all_letters[i], 
                horizontalalignment='center', verticalalignment='bottom')
    t.set_color([0, 1, 1])


# Numpy and matrix notation revisited: Ida's shelves

slideshow presentation: [Ida's shelves](https://docs.google.com/presentation/d/151qAkqWrQZB8n2TwQkULv3-4bDYKl01zK-V3pTvrOV4/edit?usp=sharing)

In [None]:
new_mat= np.array(
[  [6, 23,  5, 42],
[13,   4,  1,  56],
  [2,   5, 42,   6],
  [6,  23, 5, 13]])
printvar(new_mat, "new_mat")

np.where(new_mat==42)

In [None]:
z = np.array([[1, 2, 3], [3, 4, 9], [2, 5, 0], [0, 0, 2]])
printvar(z, "z")

Rows,Cols  = np.where(z==2)
printvar(Rows,"Rows")
printvar(Cols, "Cols")

In [None]:
z[I] = 400
printvar(z, "z")

In [None]:
# iterating over all the items we found:

for x in range(len(Rows)):
    printvar((Rows[x], Cols[x]), "pos")


### be careful: unlike Matlab, Numpy does not copy matrices by value, but by reference:

In [None]:
z = np.array([[1, 2, 3], [3, 4, 9], [2, 5, 0], [0, 0, 2]])
printvar(z, "z")
y = z
x = np.copy(z)
x[0,2] = 1000
y[0,1] = 1000
printvar(x, "x") # x is a real copy
printvar(y, "y") # y just copied a pointer to the shelves
printvar(z, "z")

### Mixing data types (e.g., strings and numbers) in numpy arrays doesn't work!


In [None]:

z = np.array(['r', 'o', 'b'])

z[1] = 'x'
z[1] = 30.0
np.append(z, ['q'])
printvar(z, "z")



**PITFALL:** Numpy has 1 dimensional arrays, distinct from single rows or columns which are 2D arrays
+ 1D array has shape `(N,)`   and is specified with single brackets [] 
+ 2D arrays are specified with double brackets [[]]
    + A 2D row has shape `(1,N)`
    + A 2D column has shape `(N,1)`

The 1D arrays allow more flexibility in many instances, but some functions require a 2D array as input, or behave differently when given a 1D array instead of a 2D array. Not recognizing these instances and getting errors or incorrect results is one of the primary mistakes of new Numpy users. 


In [None]:
a = np.array([1, 2, 3, 4])
printvar(a, "a")
printvar(a.T, "a.T")

b = np.array([[1, 2, 3, 4]])
printvar(b, "b")
printvar(b.T, "b.T")


numrows, numcols = np.shape(b)   # returns number of rows and cols
thesize = np.size(b)             # returns the total number of elements

printvar(numrows, "numrows")
printvar(numcols, "numcols")
printvar(thesize, "thesize")

For turning a 2D array into a 1D array, the functions `np.squeeze()` and `np.flatten()` are useful

For turning a 1D array into a 2D array, the `np.reshape()` function is useful

In [None]:
a = np.reshape(np.array([1, 2, 3, 4]), (1, 4))
b = np.transpose(a)
print("a = \n", a, "\n")
print("b = \n", b, "\n")

l = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
c = np.array(l)
printvar(c, "c")
printvar(c.T, "c.T")


In [None]:
a = np.zeros((4, 1))
print(a, a.shape, "\n")

a = a.squeeze()
print(a, a.shape, "\n")

a = a.reshape([4,1])
print(a, a.shape, "\n")

a = a.reshape([1,4])
print(a, a.shape, "\n")

a = a.reshape([2,2])
print(a, a.shape)

---

# Example: sieve of eratosthenes

First we test computing the row and column, which live in class Carlos wasn't doing too good at. Now we get it right.
See the second version fo the function for a 1-line, correct way to do it.

Having written and tested the two functions below, in the actual sieve in the next cell it turns out we won't
us them-- the second function, the one-line, is too short to be worth its own function. (Either way would have been fine.)


In [None]:


# Computing the row and column-- the slow but safe way we wrote it in class:
def linear_index_to_rowcol(i, Nrows, Ncols):
    """
    Given a number i, calculates the corresponding row and column in
    a matrix where the numbers run up one by one, first along the columns of the first row,
    continuing along the columns of the second row, etc.

    PARAMS:
        i      linear index
        Nrows  number of rows of the matrix
        Ncols  number of columns of the matrix

    RETURNS:
        i_row, i_col    The corresponding row and column

    EXAMPLE:
    Nrows = 3, Ncols = 4, so that
    M  [[1,  2,  3, 4],
        [5,  6,  7, 8],
        [9, 10, 11, 12]]

    Then 
        linear_index_to_rowcol(10, Nrows, Ncols)  returns 2,1    
        linear_index_to_rowcol(8, Nrows, Ncols)   returns 1,3    

    """
    if i%Ncols!=0:
        myrow = int(np.floor(i/Ncols))
        mycol = i - myrow*Ncols - 1
    else:
        myrow = int(np.floor(i/Ncols))-1
        mycol = Ncols-1

    return myrow, mycol
    

# Computing the row and column-- getting it correct in one line:
def linear_index_to_rowcol(i, Nrows, Ncols):
    """
    Given a number i, calculates the corresponding row and column in
    a matrix where the numbers run up one by one, first along the columns of the first row,
    continuing along the columns of the second row, etc.

    PARAMS:
        i      linear index
        Nrows  number of rows of the matrix
        Ncols  number of columns of the matrix

    RETURNS:
        i_row, i_col    The corresponding row and column

    EXAMPLE:
    Nrows = 3, Ncols = 4, so that
    M  [[1,  2,  3, 4],
        [5,  6,  7, 8],
        [9, 10, 11, 12]]

    Then 
        linear_index_to_rowcol(10, Nrows, Ncols)  returns 2,1    
        linear_index_to_rowcol(8, Nrows, Ncols)   returns 1,3    

    """
    return int(np.floor((i-1)/Ncols)), (i-1)%Ncols
    
    
# --------- Now for testing:

Nrows = 3; Ncols = 4; i = 12
myrow, mycol = linear_index_to_rowcol(i, 3, 4)    
printvar(i, "i")
printvar((myrow, mycol), "myrow, mycol")
M = np.reshape(np.array(range(1, Nrows*Ncols+1)), [Nrows, Ncols])
M



Now to the actal sieve

In [4]:
import numpy as np

Ncols = 10
Nrows = 5  # this means we'll go up to 50 in checking for primes 

M = np.array(range(1, Ncols*Nrows+1))
M = np.reshape(M, [Nrows, Ncols])


# The bug that we had in class was that I was using the variable "myrow" to index the row of the current
# number being considered as a prime; but then I was using the same variable name "myrow" to find the row
# of the multiple that was going to be crossed off!  That confused the two things completely!  Now fixed.

verbose = False
for myrow in range(0, Nrows):
    for mycolumn in range(0, Ncols):
        # The two loops above mean that we'll go through all the rows and all the columns,
        # going first through all the columns of the first row; then all cols of the second row; etc.

        mynumber = M[myrow, mycolumn]
        if verbose:
            print("row = ", myrow, " col = ", mycolumn, " value = ", mynumber)
        if mynumber!=1 and mynumber>0:   # Don't look at multiples of 1; and only look at
                                         # non-negatives, that means it hasn't been crossed off                
            if verbose:
                print("mynumber=", mynumber)
            mult = 2*mynumber  # we'll start by crossing off twice the number
            while mult <= np.size(M):  # only cross multiples off if they're within the matrix
                if verbose:
                    print("   mynumber=", mynumber, "  crossing off mult=", mult)
                mults_row = int(np.floor((mult-1)/Ncols))
                mults_col = (mult-1)%Ncols
                if M[mults_row,mults_col] > 0:   # only cross it off if it hadn't yet been crossed off
                    M[mults_row,mults_col] = -M[mults_row,mults_col]  # that means "crossed off"

                mult = mult + mynumber  # then go to the next multiple
                
print(M)                    
        
        
#  BUGGY:        
# for i in range(1, Nrows*Ncols+1):
#     mycolumn = i%Ncols
#     if mycolumn==0:
#         mycolumn = Ncols
#     mycolumn = mycolumn-1
        
#     myrow = int(np.floor(i/Ncols))  # np.floor() rounds down to nearest int;   np.ceil() rounds up

    
    
    
# fig = plt.figure(figsize=[8,8])


row =  0  col =  0  value =  1
row =  0  col =  1  value =  2
mynumber= 2
   mynumber= 2   crossing off mult= 4
   mynumber= 2   crossing off mult= 6
   mynumber= 2   crossing off mult= 8
   mynumber= 2   crossing off mult= 10
   mynumber= 2   crossing off mult= 12
   mynumber= 2   crossing off mult= 14
   mynumber= 2   crossing off mult= 16
   mynumber= 2   crossing off mult= 18
   mynumber= 2   crossing off mult= 20
   mynumber= 2   crossing off mult= 22
   mynumber= 2   crossing off mult= 24
   mynumber= 2   crossing off mult= 26
   mynumber= 2   crossing off mult= 28
   mynumber= 2   crossing off mult= 30
   mynumber= 2   crossing off mult= 32
   mynumber= 2   crossing off mult= 34
   mynumber= 2   crossing off mult= 36
   mynumber= 2   crossing off mult= 38
   mynumber= 2   crossing off mult= 40
   mynumber= 2   crossing off mult= 42
   mynumber= 2   crossing off mult= 44
   mynumber= 2   crossing off mult= 46
   mynumber= 2   crossing off mult= 48
   mynumber= 2   crossing off mu

# Saving and loading data

What on earth was that line in the text problem above, 

    mystring = loadmat('joyce.mat')['str'][0]
    
We'll look into how to save and laod data, and as in the line above, that'll require us to learn about dictionaries.

## --Dictionaries

We've gotten used to storing things in lists (or arrays), and accessing them with the square bracket notation, by asking for the item in a particular position.

Dictionaries allow us to give the different "shelves" names rather than positions: the position no longer matters, and what you do is access a particular shelf through its name (which is a string).

The names of the shelves are called `keys` and the values are called `items`

In [None]:
# Let's create a dictionary in variable b, which will have a shelf called "key1" with 
# the string "value1" stored in it, and a shelf called "key2" with the number 300 stored in it.

# Note curly braces and the colon linking each key to its value.
b = {'key1' : "value1", "key2" : 300}

# You access a shelf with the usual square bracket notation, but now you put the string for a key in it.
print(b['key2'])

# if you ask for a key that doesn't exist you get an error
# For example:
# >> b['gg']
#   KeyError: 'gg'

The values can be anything -- even other dictionaries

In [None]:
d = {'my' : "this", 'and' : 2, 'furthermore' : [1, 2, 3]}
print(d['my'])

d['my']= b
print(d['my'])



In [None]:
# You can find what keys a dictionary has with the .keys() function. This returns an iterable, 
# that you can put in a for loop:

print(d.keys(), "d.keys()")
for i in d.keys():
    printvar(i, "key")

# Or, you can cast it into a list and see it directly:    
print("\nList of keys is ", list(d.keys()), "\n")    
    
# The .items() function returns a (key, value) pair that you can iterate over:  
for k,v in d.items():
    print("key is =", k, ";\t  the value = ", v)


## -- back to saving and loading: using np.savez() and np.load()

In [None]:
# Let's create to matrices that we will save

x = np.ones([3,3])
print(x)
y = np.random.randn(2,3)
print(y)
b = 'this is a string'

# The np.savez command saves them into an automatically-compressed file ending in .npz
np.savez('trash', x=x, y=y, b=b)



`np.load(filename)`  returns a dictionary; the keys will be the names of the saved variables.

In [None]:
g = np.load('trash.npz')

loaded_varnames = list(g.keys())
printvar(loaded_varnames, "loaded_varnames")   # remember the prinvar function was defined in the first cell, at the top of the notebook

# We can access those variables through their names:
printvar(g['y'], "g['y']")

printvar(g['b'], "g['b']")

# print(g['z'])

`np.savez()` works great for saving matrices that contain numbers or strings. 

But it's not so great for saving regular Python lists with mixed data types-- it converts it all to strings! For savign more complex mixtures of data types, we'll turn to **hdf5**

In [None]:
# A Python list with a number, a string, and a Boolean:

z = [1, 'this', True]

np.savez('trash2', z=z)

g = np.load('trash2.npz')

print(g['z'])
print("For example, the type of the last element, of g['z'][2] is now ", type(g['z'][2]), 
      "\nwhen it should have been boolean")



## -- can also use hdf5 file format: will cover this tomorrow

# Printing figures out to files -- pyplot's savefig command

In [None]:
%matplotlib notebook

import numpy as np
from matplotlib import pyplot as plt
import time

t = np.arange(0, 2*np.pi*4, np.pi/10)
h = plt.plot(t, np.sin(t), 'o-')

plt.savefig('myfigure.pdf')
# please see help(plt.savefig)

---
# Animation example

In [None]:
%matplotlib notebook

import numpy as np
from matplotlib import pyplot as plt
import time

t = np.arange(0, 2*np.pi*4, np.pi/10)
h = plt.plot(t, np.sin(t), 'o-')  # h is a list of line objects
line0 = h[0]          # we only plotted one line, so h[0] is the only one we want to worry about

fig = plt.figure(1)   # this gets a pointer to the figure object

txt_phase = 2*np.pi*3.5;
txt = plt.text(txt_phase, 0.9, 'hello')    

phase = 0; dphase = np.pi/10
for k in range(100):
    phase = phase + dphase
    line0.set_ydata(np.sin(t+phase))

    txt_phase = txt_phase - dphase
    if txt_phase < 0:
        txt_phase = txt_phase + 2*np.pi*4
    txt.set_position([txt_phase, 0.9])

    fig.canvas.draw()    # Force a drawing now of whatever graphics commands you've sent so far
    # time.sleep(0.01)     # this just pauses for 10 ms-- you can make it longer to move slower

