In [None]:
from __future__ import print_function
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

![NASA](http://www.nasa.gov/sites/all/themes/custom/nasatwo/images/nasa-logo.svg)

<center><h1><font size="+3">GSFC Python Bootcamp</font></h1></center>

---

<center><h1>
    <font color="red">Python Advanced Data Types</font>  
</h1></center>

<b>We will cover the following data types: </b>
- <A HREF="https://www.programiz.com/python-programming/list"> List </A>
- <A HREF="https://www.programiz.com/python-programming/dictionary"> Dictionary </A>
- <A HREF="https://www.programiz.com/python-programming/tuple"> Tuple </A>
- <A HREF="https://www.programiz.com/python-programming/set"> Set </A>
- <A HREF="https://pymotw.com/2/collections/counter.html"> Counter </A>

## <font color="red">Lists</font>

* A list is an ordered collection of elements. 
* Defined by enclosing a comma-separated sequence of objects in square brackets ([])
* Elements can be accessed by index.
* Grow and shrink as needed.
* Can contain a mix of types
* <font color='blue'>Are mutable</font>.

#### Initialization

In [None]:
empty_list = []

In [None]:
some_primes = [2, 3, 5, 7, 11, 13, 17, 19]
fruits = ['apple', 'mango', 'orange', 'kiwi', 'blueberry']

#### Access elements

In [None]:
print(some_primes[0::2])
print(fruits[1])
print(fruits[2:4])
print(fruits[-1])

#### Iterate over a list

In [None]:
# This is how one iterates over a list
for item in fruits:        
    print(item,)

In [None]:
for item in (range(len(fruits)-1,-1,-1)):        
    print(fruits[item],)

In [None]:
for item in reversed(fruits):        
    print(item,)

Sometimes we need to loop over a list and retrieve the element and its correponding index

In [None]:
i = 0
for item in fruits:
    print (i,'--->',fruits[i])
    i += 1

In [None]:
for i in range(len(fruits)):
    print (i, '--->', fruits[i])

The <font color='blue'>enumerate</font> function  gives us an iterable where each element is an object (called a tuple) that contains the index of the item and the original item value.

In [None]:
for i, name in enumerate(fruits, start=1): # optional start=1 argument
    print ("item {}: {}".format(i, name))

Looping over two lists.

In [None]:
colors = ['red','yellow','orange', 'green', 'blue']
for i in range(len(fruits)):
    print (fruits[i], '--->', colors[i])

Use <font color='blue'>zip</font>*

In [None]:
for item, color in zip(fruits, colors):
    print (item, '--->', color)

#### Lists are mutable.

In [None]:
print ('I have', len(fruits), \
       'fruits. The list ID is ',id(fruits))

In [None]:
print ('I also have to buy a banana.')
fruits.append('banana')
print ('My list of fruits is now ', fruits, \
       'The list ID is ',id(fruits))

In [None]:
print ('I will sort my list now')
fruits.sort() 
# Note that the sort is done ON the list, i.e. it is mutated
print('Sorted list of fruits is', fruits, \
      'The list ID is still ',id(fruits))

# Can also use sorted(x) - does not mutate the original list

In [None]:
# What happens here?
fruits.append('apple')
print(fruits)

In [None]:
# Extend (like +)
morefruits = ['guava', 'peach']
fruits.extend(morefruits)
print (fruits)

In [None]:
# Insert : insert(index, item)
fruits.insert(2, 'pineapple')
print(fruits)

In [None]:
# Delete the last entry
fruits.pop()
print(fruits)

In [None]:
# Delete a specific entry
fruits.pop(2)
print(fruits)

In [None]:
# Delete the entire list and create an empty list
fruits.clear()
print(fruits)

More help

In [None]:
fruits.<TAB>

In [None]:
# Mixed types
P = ['Wednesday', 'April', 5, 2017, ('a','b','c')]
print (P[0:4])
print (P[4])

In [None]:
# Multi-dimensional list
A = [[1, 3], [2, 4], [1, 9], [4, 16]]
print (A[0])
print (A[2][0])

#### Exercise

Transform the gettysburg_address string into a list whose elements are just the words in the text (no punctuation or special characters). Then print the list and the list length (i.e. number of words in the Gettysburg address). 

In [None]:
gettysburg_address = """
Four score and seven years ago our fathers brought forth on this continent, 
a new nation, conceived in Liberty, and dedicated to the proposition that 
all men are created equal.

Now we are engaged in a great civil war, testing whether that nation, or 
any nation so conceived and so dedicated, can long endure. We are met on
a great battle-field of that war. We have come to dedicate a portion of
that field, as a final resting place for those who here gave their lives
that that nation might live. It is altogether fitting and proper that we
should do this.

But, in a larger sense, we can not dedicate -- we can not consecrate -- we
can not hallow -- this ground. The brave men, living and dead, who struggled
here, have consecrated it, far above our poor power to add or detract.  The
world will little note, nor long remember what we say here, but it can never
forget what they did here. It is for us the living, rather, to be dedicated
here to the unfinished work which they who fought here have thus far so nobly
advanced. It is rather for us to be here dedicated to the great task remaining
before us -- that from these honored dead we take increased devotion to that
cause for which they gave the last full measure of devotion -- that we here
highly resolve that these dead shall not have died in vain -- that this
nation, under God, shall have a new birth of freedom -- and that government
of the people, by the people, for the people, shall not perish from the earth.
"""

In [None]:
help(gettysburg_address.split)

In [None]:
# use split to create a list
ga_list = gettysburg_address.split()
new_ga_list = []
for word in ga_list:
# remove punctuation and non-letter characters
    w = word.replace(",","").replace(".","")
    if not "--" in w:
        new_ga_list.append(w)
print(new_ga_list)
print("Number of words: ",len(new_ga_list))

## <font color="red">Tuple</font>

* A tuple is an ordered sequence of elements. 
* Defined by enclosing the elements in parentheses (())
* Used for fixed data
* <font color='blue'>Are immutable</font>.
* Accessing elements of a tuple is faster than that of a list.

#### Initialization

In [None]:
empty_tuple = ()

In [None]:
some_primes = 2, 3, 5, 7, 11, 13    # Parentheses are optional
solar_system = ('mercury', 'venus', 'earth', 'mars', 'jupiter', 'saturn', 'uranus', 'neptune')

In [None]:
print ('Number of planets in the solar system is', len(solar_system))

Tuples are simple objects. Two methods only:

In [None]:
print(solar_system.count('earth'))   # to count the number of occurence of a value
print(solar_system.index('mars'))    # to find occurence of a value

# Very little overhead -> faster than lists

#### Other interest in tuples:
    * protect the data, which is immutable (*)
    * assigning multiple values
    * unpacking data
    * tuples can be used as keys on dictionaries

In [None]:
# (*)
solar_system_list = ['mercury', 'venus', 'earth', 'mars', 'jupiter', 'saturn', 'uranus', 'neptune']

a_mutable_tuple = (solar_system_list, 'pluto')
a_mutable_tuple[0][2] = 'EARTH'
a_mutable_tuple

In [None]:
# Assigning multiple values
(x, y, z) = ['a','b','c']
x, y, z

In [None]:
# Unpacking data
data  = (1,2,3)
data

#### Access elements

In [None]:
solar_system[4]
solar_system[0:3]

In [None]:
k = 0
while k < len(solar_system):
    print (solar_system[k])
    k += 1

## <font color="red">Dictionary</font>

* A dictionary is an associative data structure of variable length.
* Data is unordered, so elements are accessed by an associated key value.
* <font color='blue'>Are mutable</font>.

#### Initialization

In [None]:
empty_dict = {}

In [None]:
daily_temps = {'mon': 70.2, 'tue': 67.2, 'wed': 71.8, 
               'thur': 73.2, 'fri': 75.6}

daily_temps = dict([('mon', 70.2), ('tue', 67.2), 
                    ('wed', 71.8), ('thur', 73.2), ('fri', 75.6)])

daily_temps = dict(mon=70.2, tue=67.2, wed=71.8, thur=73.2, fri=75.6)

days = ['mon', 'tue', 'wed','thu','fri']
temps = [70.2, 67.2, 71.8, 73.2, 75.6]
daily_temps = dict(zip(days, temps))

<b> Note that you can use only immutable objects for the keys of a dictionary but you can use either immutable or mutable objects for the values of the dictionary. </b>

#### Access elements

In [None]:
# Location at given key stores desired element in the dictionary. 
# Square brackets are used for accessing elements:
daily_temps['mon']

In [None]:
# Get the list of keys
daily_temps.keys()

In [None]:
# Get the list of values
daily_temps.values()

In [None]:
# Get the list of pairs of (key, value)
daily_temps.items()

In [None]:
# Determine if a particular key exists
try:
    check = daily_temps.has_key('sun')
except:
    check = 'sun' in daily_temps
print(check)  

The specific location that a value is stored is determined by a particular method of converting key values into index values called <font color='red'>hashing</font>. Thus, key values must be hashable. A requirement for a data type to be hashable is that the type must be immutable.

In [None]:
temps = {('April',3,2017): 70.2, ('April',4,2017): 67.2, 
         ('April',5,2017): 71.8}
temps[('April',3,2017)]

#### Looping over a dictionary

In [None]:
# Loop all the keys from a dictionary
for day in daily_temps:
    print(day)

In [None]:
for day in daily_temps:
    if day.startswith('w'):
        print(daily_temps[day])

In [None]:
# Loop every key and value from a dictionary
for day, temp in daily_temps.items():
    print(day, temp)

In [None]:
# What will the following program produce:
colors = ['red','orange','yellow','green','blue','indigo','violet']
d = {}
for color in colors:
    key = len(color)
    if key not in d:
        d[key] = []
    d[key].append(color)
d

#### Ordered Dictionary

In [None]:
from collections import OrderedDict 
od = OrderedDict()
od['a'] = 'A'
od['b'] = 'B'
od['c'] = 'C'
od['d'] = 'D'

for key, value in od.items(): 
    print(key, value) 

#### Exercise:

Given the scrabble scores dictionary, write a program to compute the value of any word. For word='Python' the score is 14.

In [None]:
scores = {"a": 1, "c": 3, "b": 3, "e": 1, "d": 2, "g": 2,
         "f": 4, "i": 1, "h": 4, "k": 5, "j": 8, "m": 3,
         "l": 1, "o": 1, "n": 1, "q": 10, "p": 3, "s": 1,
         "r": 1, "u": 1, "t": 1, "w": 4, "v": 4, "y": 4,
         "x": 8, "z": 10}
word = 'Python'

In [None]:
# Write your program here
score = 0
for c in word:
    score += scores[c.lower()]
score

## <font color="red">Set</font>

* A sequence used to store non-duplicate data.
* Data is unordered, accesed via indexing.
* <font color='blue'>mutable</font>.

#### Initialization

In [None]:
empty_set = ()
empty_set_too = {}
fibo = {1,1,2,3,5}
some_primes = [1,1,2,3,5]
primes = set(some_primes)

In [None]:
fibo
primes

#### Mathematical set operations

In [None]:
a = set([1, 2, 3, 4])
b = set([3, 4, 5, 6])

In [None]:
a | b # Union or a.union(b)

In [None]:
a & b # Intersection  or a.intersection(b)

In [None]:
a < b # Subset or a.issubset(b)

In [None]:
a - b # Difference or a.difference(b)

### Exercise:

Use set() to compute the number of unique words in the "Gettysburg address". 

Extra credit: How many of each are there? (Hint: you need a dictionary)

In [None]:
gettysburg_address
new_ga_list # Result from previous exercise
unique_words = set(new_ga_list)
print ("Number of unique words: ",len(unique_words))
word_counts = {}
for w in unique_words:
    word_counts[w] = new_ga_list.count(w)
word_counts

## <font color="red">Counter</font>

- Counter is an unordered collection where elements are stored as dictionary keys and their count as dictionary value
- Counter elements count can integers

#### Initialization

In [None]:
from collections import Counter

# empty Counter
empty_counter = Counter()
print(empty_counter) 

# Counter with initial values
my_counter = Counter(['a', 'a', 'b'])
print(my_counter)  

my_counter = Counter(a=2, b=3, c=1)
print(my_counter)  

#### Other data types as arguments of Counter

In [None]:
# Iterable as argument for Counter
my_counter = Counter('abcaaddcba')
print(my_counter) 

In [None]:
# List as argument to Counter
words_list = ['Cat', 'Dog', 'Horse', 'Dog']
my_counter = Counter(words_list)
print(my_counter)

In [None]:
# Dictionary as argument to Counter
word_count_dict = {'Dog': 2, 'Cat': 1, 'Horse': 1}
my_counter = Counter(word_count_dict)
print(my_counter) 

In [None]:
# Counter works with non-numbers too
special_counter = Counter(name='Pankaj', age=20)
print(special_counter)  

#### Getting Count of Elements

In [None]:
my_counter = Counter({'Dog': 2, 'Cat': 1, 'Horse': 1})
countDog = my_counter['Dog']
print(countDog)

# getting count for non existing key, don't cause KeyError
print(my_counter['Unicorn'])

#### Setting Count of Elements

In [None]:
my_counter = Counter({'Dog': 2, 'Cat': 1, 'Horse': 1})
# setting count
my_counter['Horse'] = 0
print(my_counter) 

# setting count for non-existing key, adds to Counter
my_counter['Unicorn'] = 1
print("Add to a counter: ", my_counter)  

#### Deleting an element from Counter

In [None]:
del my_counter['Unicorn']
print("Element deleted: ", my_counter)

#### Arithmetic Operations

In [None]:
c1 = Counter(a=2, b=0, c=-1)
c2 = Counter(a=1, b=-1, c=2)

c = c1 + c2  # return items having positive count only
print("Add two counters: ", c)  

c = c1 - c2  # keeps only positive count elements
print("Difference of two counters: ", c)  

c = c1 & c2  # intersection min(c1[x], c2[x])
print("Intersection of two counters: ", c) 

c = c1 | c2  # union max(c1[x], c2[x])
print("Union of two counters: ", c)  

#### Other Operators

In [None]:
my_counter = Counter({'Dog': 2, 'Cat': -1, 'Horse': 0})

# elements() - Returns the list of elements in the counter.
#            - Only elements with positive counts are returned.
elements = my_counter.elements()
for value in elements:
    print(value)

In [None]:
# most_common()
most_common_element = my_counter.most_common(1)
print("Most common element: ", most_common_element)

least_common_element = my_counter.most_common()[:-2:-1]
print("Least common element: ", least_common_element) 

In [None]:
my_counter = Counter('ababab')
print(my_counter)  
c = Counter('abc')
print(c)  

# subtract
my_counter.subtract(c)
print("Subtract the counter: ", my_counter)  

# update
my_counter.update(c)
print("Update the counter: ", my_counter)

In [None]:
my_counter = Counter({'a': 3, 'b': 3, 'c': 0})
# miscellaneous examples
print("Sum of values: ", sum(my_counter.values()))

print("From counter to list: ", list(my_counter)) 
print("From counter to set:  ", set(my_counter))  
print("From counter to dict: ", dict(my_counter))  
print("List of items:        ", my_counter.items())  

# remove 0 or negative count elements
my_counter = Counter(a=2, b=3, c=-1, d=0)
my_counter = +my_counter
print("No positive count removed: ", my_counter)  

# clear all elements
my_counter.clear()
print(my_counter)  

### Exercise:

Use Counter to compute the number of unique words in the "Gettysburg address" and to list the number of times each word occurs. 

## Quiz 
<A HREF="https://www.programiz.com/python-programming/quiz/native-data-types"> Click here to take a quick quiz on data types</A>