In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

![NASA](http://www.nasa.gov/sites/all/themes/custom/nasatwo/images/nasa-logo.svg)

<center><h1><font size="+3">Spring 2017 Python Training</font></h1></center>

---

<center><h4>Langley Research Center - April 5, 2017</h4></center>

Definition:
<font color='blue'>Iterable</font>: An object capable of returning its members one at a time. Examples of iterables include all <font color='blue'>sequence</font> types (such as list, str, and tuple) and some non-sequence types like dict all of which are discussed next.

# Basic data structures

## String*

A string is an <font color='blue'>immutable</font> sequence of characters.

In [None]:
empty_str = ''  
single_char = 'x'
single_quot_str = 'The circumference of the circle is 2 pi R'     
double_quot_str = "The circumference of the circle is 2 pi R"    
circum_def = '''The circumference of the circle is 2 pi R '''    # can be split among several lines
special_chars = """ a string with special character " and ' inside """
control_chars = " a string with escaped special character \", \' inside " # Note the use of the control character \

What does it mean to be immutable?

In [None]:
circum_def
id(circum_def)

In [None]:
circum_def = 'The circumference of the circle is 2 pi r'
id(circum_def)

#### Operating on strings
* The + and * operator are overloaded and thus can be used to create new strings
* One can also use comparison operators >, >=, < <=, != to compare strings
* ... and membership operators in, not in

In [None]:
# Concatenation
x = "hello"
y = "world"
x + y

# Multiplication
'#'*10

# Comparison
'a' < 'b'

# Membership
'pi' in circum_def

#### Strings are sequences. 
For all sequences you can
* can get any single character in a string using an index specified in square brackets
* can loop over them
* can look at and extract contiguous sections of a sequence using a colon operator - slicing

#### Indexing

In [None]:
circum_def

# Python uses zero-based indexing. Square brackets are used for indexing:
circum_def[0]          

# random access
circum_def[4]          

#### Looping

In [None]:
for c in 'hello':
    print c,

#### Slicing
Given a string (or sequence) s, s[start:end:step] is a substring that starts at index 'start' and ends at index 'end-1'

In [None]:
circum_def[25:30]
circum_def[1:20:2]
circum_def[-7:-1]

#### file IO
When you read text from a file (or STDIN) you get - text. This is important to remember when you are reading numerical data and you intend to use it as such.

In [None]:
x = raw_input('Enter a number: ')
type(x)
x+str(1)

#### More on strings

In [None]:
circum_def.

In [None]:
dir(circum_def)

## Lists

* A list is an ordered sequence of elements. 
* Grow and shrink as needed.
* Can contain a mix of types
* <font color='blue'>mutable</font>.

Initialization

In [None]:
empty_list = []

In [None]:
some_primes = [2, 3, 5, 7, 11, 13, 17]
shoplist = ['apple', 'mango', 'orange']

In [None]:
# Access elements
some_primes[0::2]
shoplist[1]
shoplist[2:4]

In [None]:
# This is how one iterates over a list
for item in shoplist:        
    print item,

In [None]:
for item in (range(len(shoplist)-1,-1,-1)):        
    print shoplist[item],

In [None]:
for item in reversed(shoplist):        
    print item,

Sometimes we need to loop over a list and retrieve the element and its correponding index

In [None]:
i = 0
for item in shoplist:
    print i,'--->',shoplist[i]
    i += 1

In [None]:
for i in range(len(shoplist)):
    print i, '--->', shoplist[i]

The <font color='blue'>enumerate</font> function  gives us an iterable where each element is an object (called a tuple) that contains the index of the item and the original item value.

In [None]:
for i, name in enumerate(shoplist, start=1): # optional start=1 argument
    print "item {}: {}".format(i, name)

Looping over two lists.

In [None]:
colors = ['red','yellow','orange']
for i in range(len(shoplist)):
    print shoplist[i], '--->', colors[i]

Use <font color='blue'>zip</font>*

In [None]:
for item, color in zip(shoplist, colors):
    print item, '--->', color

Lists are mutable.

In [None]:
print 'I have', len(shoplist), 'items to purchase. The list ID is ',id(shoplist)

In [None]:
print 'I also have to buy a banana.'
shoplist.append('banana')
print 'My shopping list is now ', shoplist,'The list ID is ',id(shoplist)

print 'I will sort my list now'
shoplist.sort() # Note that the sort is done ON the list, i.e. it is mutated
print 'Sorted shopping list is', shoplist,'The list ID is still ',id(shoplist)

# Can also use sorted(x) - does not mutate the original list

In [None]:
# What happens here?
shoplist.append('apple')
print shoplist

In [None]:
# Extend (like +)
veggies = ['carrot','broccoli']
shoplist.extend(veggies)
print shoplist

In [None]:
# Insert : insert(index, item)
shoplist.insert(2, 'pineapple')
print shoplist

More help

In [None]:
shoplist.<TAB>

In [None]:
# Mixed types
P = ['Wednesday', 'April', 5, 2017, ('a','b','c')]
print P[0:4]
print P[4]

In [None]:
# Multi-dimensional list
A = [[1, 3], [2, 4], [1, 9], [4, 16]]
print A[0]
print A[2][0]

Exercise

Transform the gettysburg_address string into a list whose elements are just the words in the text (no punctuation or special characters). Then print the list and the list length (i.e. number of words in the Gettysburg address). 

In [2]:
gettysburg_address = """
Four score and seven years ago our fathers brought forth on this continent, 
a new nation, conceived in Liberty, and dedicated to the proposition that 
all men are created equal.

Now we are engaged in a great civil war, testing whether that nation, or 
any nation so conceived and so dedicated, can long endure. We are met on
a great battle-field of that war. We have come to dedicate a portion of
that field, as a final resting place for those who here gave their lives
that that nation might live. It is altogether fitting and proper that we
should do this.

But, in a larger sense, we can not dedicate -- we can not consecrate -- we
can not hallow -- this ground. The brave men, living and dead, who struggled
here, have consecrated it, far above our poor power to add or detract.  The
world will little note, nor long remember what we say here, but it can never
forget what they did here. It is for us the living, rather, to be dedicated
here to the unfinished work which they who fought here have thus far so nobly
advanced. It is rather for us to be here dedicated to the great task remaining
before us -- that from these honored dead we take increased devotion to that
cause for which they gave the last full measure of devotion -- that we here
highly resolve that these dead shall not have died in vain -- that this
nation, under God, shall have a new birth of freedom -- and that government
of the people, by the people, for the people, shall not perish from the earth.
"""

In [3]:
help(gettysburg_address.split)

Help on built-in function split:

split(...)
    S.split([sep [,maxsplit]]) -> list of strings
    
    Return a list of the words in the string S, using sep as the
    delimiter string.  If maxsplit is given, at most maxsplit
    splits are done. If sep is not specified or is None, any
    whitespace string is a separator and empty strings are removed
    from the result.



In [5]:
# use split to create a list
ga_list = gettysburg_address.split()
new_ga_list = []
for word in ga_list:
# remove punctuation and non-letter characters
    w = word.replace(",","").replace(".","")
    if not "--" in w:
        new_ga_list.append(w)
print new_ga_list
print "Number of words: ",len(new_ga_list)

['Four', 'score', 'and', 'seven', 'years', 'ago', 'our', 'fathers', 'brought', 'forth', 'on', 'this', 'continent', 'a', 'new', 'nation', 'conceived', 'in', 'Liberty', 'and', 'dedicated', 'to', 'the', 'proposition', 'that', 'all', 'men', 'are', 'created', 'equal', 'Now', 'we', 'are', 'engaged', 'in', 'a', 'great', 'civil', 'war', 'testing', 'whether', 'that', 'nation', 'or', 'any', 'nation', 'so', 'conceived', 'and', 'so', 'dedicated', 'can', 'long', 'endure', 'We', 'are', 'met', 'on', 'a', 'great', 'battle-field', 'of', 'that', 'war', 'We', 'have', 'come', 'to', 'dedicate', 'a', 'portion', 'of', 'that', 'field', 'as', 'a', 'final', 'resting', 'place', 'for', 'those', 'who', 'here', 'gave', 'their', 'lives', 'that', 'that', 'nation', 'might', 'live', 'It', 'is', 'altogether', 'fitting', 'and', 'proper', 'that', 'we', 'should', 'do', 'this', 'But', 'in', 'a', 'larger', 'sense', 'we', 'can', 'not', 'dedicate', 'we', 'can', 'not', 'consecrate', 'we', 'can', 'not', 'hallow', 'this', 'ground

## Tuple

* A tuple is an ordered sequence of elements. 
* Used for fixed data
* <font color='blue'>immutable</font>.

Initialization

In [None]:
empty_tuple = ()

In [None]:
some_primes = 2, 3, 5, 7, 11, 13    # Parentheses are optional
solar_system = ('mercury', 'venus', 'earth', 'mars', 'jupiter', 'saturn', 'uranus', 'neptune')

In [None]:
print 'Number of planets in the solar system is', len(solar_system)

Tuples are simple objects. Two methods only:

In [None]:
print solar_system.count('earth')   # to count the number of occurence of a value
print solar_system.index('mars')    # to find occurence of a value

# Very little overhead -> faster than lists

Other interest in tuples:
    * protect the data, which is immutable (*)
    * assigning multiple values
    * unpacking data
    * tuples can be used as keys on dictionaries

In [None]:
# (*)
solar_system_list = ['mercury', 'venus', 'earth', 'mars', 'jupiter', 'saturn', 'uranus', 'neptune']

a_mutable_tuple = (solar_system_list, 'pluto')
a_mutable_tuple[0][2] = 'EARTH'
a_mutable_tuple

In [None]:
# Assigning multiple values
(x, y, z) = ['a','b','c']
x, y, z

In [None]:
# Unpacking data
data  = (1,2,3)
data

Tuples are sequences:

In [None]:
# Access elements
solar_system[4]

In [None]:
solar_system[0:3]

In [None]:
k = 0
while k < len(solar_system):
    print solar_system[k]
    k += 1

## Dictionary

* A dictionary is an associative data structure of variable length.
* Data is unordered, so elements are accessed by an associated key value.
* <font color='blue'>mutable</font>.

Initialization

In [None]:
empty_dict = {}

In [None]:
daily_temps = {'mon': 70.2, 'tue': 67.2, 'wed': 71.8, 'thur': 73.2, 'fri': 75.6}

daily_temps = dict([('mon', 70.2), ('tue', 67.2), ('wed', 71.8), ('thur', 73.2), ('fri', 75.6)])

daily_temps = dict(mon=70.2, tue=67.2, wed=71.8, thur=73.2, fri=75.6)

days = ['mon', 'tue', 'wed','thu','fri']
temps = [70.2, 67.2, 71.8, 73.2, 75.6]
daily_temps = dict(zip(days, temps))

Note that you can use only immutable objects for the keys of a dictionary but you can use either immutable or mutable objects for the values of the dictionary. 

In [None]:
# Access elements
# Location at given key stores desired element in the dictionary. Square brackets are used for accessing elements:
daily_temps['mon']

In [None]:
daily_temps.keys()

In [None]:
daily_temps.values()

In [None]:
daily_temps.items()

In [None]:
daily_temps.has_key('sun')

The specific location that a value is stored is determined by a particular method of converting key values into index values called <font color='red'>hashing</font>. Thus, key values must be hashable. A requirement for a data type to be hashable is that the type must be immutable.

In [None]:
temps = {('April',3,2017): 70.2, ('April',4,2017): 67.2, ('April',5,2017): 71.8}
temps[('April',3,2017)]

Looping over a dictionary:

In [None]:
# To loop all the keys from a dictionary 
for k in dict:
    print k

In [None]:
for day in daily_temps:
    print day

In [None]:
for day in daily_temps:
    if day.startswith('w'):
        print daily_temps[day]

In [None]:
# To loop every key and value from a dictionary
for k, v in dict.items():
    print k, v

In [None]:
for day, temp in daily_temps.items():
    print day, temp

In [None]:
# What will the following program produce:
colors = ['red','orange','yellow','green','blue','indigo','violet']
d = {}
for color in colors:
    key = len(color)
    if key not in d:
        d[key] = []
    d[key].append(color)
d

Exercise:

Given the scrabble scores dictionary, write a program to compute the value of any word. For word='Python' the score is 14.

In [51]:
scores = {"a": 1, "c": 3, "b": 3, "e": 1, "d": 2, "g": 2,
         "f": 4, "i": 1, "h": 4, "k": 5, "j": 8, "m": 3,
         "l": 1, "o": 1, "n": 1, "q": 10, "p": 3, "s": 1,
         "r": 1, "u": 1, "t": 1, "w": 4, "v": 4, "y": 4,
         "x": 8, "z": 10}
word = 'Python'

In [53]:
# Write your program here
score = 0
for c in word:
    score += scores[c.lower()]
score

14

## Set

* A sequence used to store non-duplicate data.
* Data is unordered, accesed via indexing.
* <font color='blue'>mutable</font>.

Initialization

In [None]:
empty_set = ()
empty_set_too = {}
fibo = {1,1,2,3,5}
some_primes = [1,1,2,3,5]
primes = set(some_primes)

In [None]:
fibo
primes

Mathematical set operations

In [None]:
a = set([1, 2, 3, 4])
b = set([3, 4, 5, 6])

In [None]:
a | b # Union or a.union(b)

In [None]:
a & b # Intersection  or a.intersection(b)

In [None]:
a < b # Subset or a.issubset(b)

In [None]:
a - b # Difference or a.difference(b)

Exercise:

Use set() to compute the number of unique words in the "Gettysburg address". 

Extra credit: How many of each are there? (Hint: you need a dictionary)

In [7]:
gettysburg_address
new_ga_list # Result from previous exercise
unique_words = set(new_ga_list)
print "Number of unique words: ",len(unique_words)
word_counts = {}
for w in unique_words:
    word_counts[w] = new_ga_list.count(w)
word_counts

Number of unique words:  142


{'But': 1,
 'Four': 1,
 'God': 1,
 'It': 3,
 'Liberty': 1,
 'Now': 1,
 'The': 2,
 'We': 2,
 'a': 7,
 'above': 1,
 'add': 1,
 'advanced': 1,
 'ago': 1,
 'all': 1,
 'altogether': 1,
 'and': 6,
 'any': 1,
 'are': 3,
 'as': 1,
 'battle-field': 1,
 'be': 2,
 'before': 1,
 'birth': 1,
 'brave': 1,
 'brought': 1,
 'but': 1,
 'by': 1,
 'can': 5,
 'cause': 1,
 'civil': 1,
 'come': 1,
 'conceived': 2,
 'consecrate': 1,
 'consecrated': 1,
 'continent': 1,
 'created': 1,
 'dead': 3,
 'dedicate': 2,
 'dedicated': 4,
 'detract': 1,
 'devotion': 2,
 'did': 1,
 'died': 1,
 'do': 1,
 'earth': 1,
 'endure': 1,
 'engaged': 1,
 'equal': 1,
 'far': 2,
 'fathers': 1,
 'field': 1,
 'final': 1,
 'fitting': 1,
 'for': 5,
 'forget': 1,
 'forth': 1,
 'fought': 1,
 'freedom': 1,
 'from': 2,
 'full': 1,
 'gave': 2,
 'government': 1,
 'great': 3,
 'ground': 1,
 'hallow': 1,
 'have': 5,
 'here': 8,
 'highly': 1,
 'honored': 1,
 'in': 4,
 'increased': 1,
 'is': 3,
 'it': 2,
 'larger': 1,
 'last': 1,
 'little': 1,
 'l