# 4.1 Back to the Basics

### Assignments

There are two types of assign in Python

    1. copy -- when the object is immutable
    
    2. assign object reference -- when the object is muutable

In [1]:
# copy

foo = 'Monty'
bar = foo
foo = 'Python'

In [2]:
bar

'Monty'

In [5]:
# object reference

foo = ['Monty', 'Python']
bar = foo
foo[1] = 'Bodkin'
bar

['Monty', 'Bodkin']

In [6]:
# we can force the object reference to be the copy

foo = ['Monty', 'Python']
bar = foo.copy()
foo[1] = 'Bodkin'
bar

['Monty', 'Python']

In [9]:
# check this interesting example about the object reference

empty = []
nested = [empty, empty, empty]
nested

[[], [], []]

In [10]:
nested[1].append('Python')
nested

[['Python'], ['Python'], ['Python']]

In [11]:
# id() function print out the numberical identifier for any object

print(id(nested[0]))
print(id(nested[1]))
print(id(nested[2]))

140668012327808
140668012327808
140668012327808


In [12]:
nested = [[]] * 3
nested[1].append('Python')
nested

[['Python'], ['Python'], ['Python']]

In [13]:
nested[1] = ['Monty'] # overwrite with [] with new reference
nested

[['Python'], ['Monty'], ['Python']]

In [19]:
# note append and assignment is different, one apply on the reference
# and the other one create a new object

nested = [[]] * 3
nested[1] = ['Python']
nested

[[], ['Python'], []]

In [17]:
nested[0].append('a')
nested

[['Python', 'a'], ['Python', 'a'], ['Python', 'a']]

### Equality

In [20]:
empty = []
nested = [empty, empty, empty]
nested

[[], [], []]

In [21]:
nested[1].append('Python')
nested

[['Python'], ['Python'], ['Python']]

In [22]:
nested[0] is nested[1] is nested[2]

True

In [23]:
import random

size = 5
python = ['Python']
snake_nest = [python] * size

position = random.choice(range(size))
snake_nest[position] = ['Python']
snake_nest

[['Python'], ['Python'], ['Python'], ['Python'], ['Python']]

In [24]:
[id(snake) for snake in snake_nest]

[140668020568448,
 140668020568448,
 140668019657472,
 140668020568448,
 140668020568448]

### Conditionals

    non-empty string or list is evaluated as True
    
    empty string or list is evaluated as False

In [53]:
mixed = ['cat', '', ['dog'], []]

for element in mixed:
    if element:
        print(f'{element}')

cat
['dog']


In [54]:
# all() and any() can be applied to a list to checj condition

sent = ['No', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '.']
all(len(w) > 4 for w in sent)

False

In [55]:
any(len(w) > 4 for w in sent)

True

# 4.2 Sequences

In [56]:
# tuple -- immutable and ordered

t = 'walk', 'fem', 3 
t

('walk', 'fem', 3)

In [57]:
set(t)

{3, 'fem', 'walk'}

In [58]:
list(set(t))

['walk', 3, 'fem']

In [61]:
# comma has higher precedence than assignment

words = ['I', 'turned', 'off', 'the', 'spectroroute']
words[2], words[3], words[4] = words[3], words[4], words[2]
words

['I', 'turned', 'the', 'spectroroute', 'off']

In [63]:
# zip -- group sequence of items

words = ['I', 'turned', 'off', 'the', 'spectroroute']
tags = ['noun', 'verb', 'prep', 'det', 'noun']
list(zip(words, tags))

[('I', 'noun'),
 ('turned', 'verb'),
 ('off', 'prep'),
 ('the', 'det'),
 ('spectroroute', 'noun')]

In [64]:
list(enumerate(list(zip(words, tags))))

[(0, ('I', 'noun')),
 (1, ('turned', 'verb')),
 (2, ('off', 'prep')),
 (3, ('the', 'det')),
 (4, ('spectroroute', 'noun'))]

### Combining Different Sequence Types

In [65]:
words = 'I turned off the spectroroute'.split() 
words

['I', 'turned', 'off', 'the', 'spectroroute']

In [66]:
wordlens = [(len(word), word) for word in words] 
wordlens

[(1, 'I'), (6, 'turned'), (3, 'off'), (3, 'the'), (12, 'spectroroute')]

In [67]:
wordlens.sort()
' '.join(w for (_, w) in wordlens) 

'I off the turned spectroroute'

    * list: sequence of objects all have the same type, of arbitrary length
    
    * tuple: collection of objects of different type, of fixed length
    
    
    Tuple is immutable so we often use it to hold a record or a collection of different fields relating to some entity. Usually, in NLP, we should consider (if) to use tuple to store a lexical.
    
    A good way to decide when to use tuples versus lists is to ask whether the interpretation of an item depends on its position. e.g.('grail', 'noun') not ('noun', 'grail') as a lexical dictionary.
    
    For words, usually a list is used. The linguistic meaning of the words might be different, but the interpretation of list items as tokens is unchanged.

### Generator Expressions

    We’ve been making heavy use of list comprehensions, for compact and readable processing of texts.

In [70]:
import nltk

text = '''"When I use a word," Humpty Dumpty said in rather a scornful tone,
... "it means just what I choose it to mean - neither more nor less."'''

[w.lower() for w in nltk.word_tokenize(text)]

['``',
 'when',
 'i',
 'use',
 'a',
 'word',
 ',',
 "''",
 'humpty',
 'dumpty',
 'said',
 'in',
 'rather',
 'a',
 'scornful',
 'tone',
 ',',
 '...',
 '``',
 'it',
 'means',
 'just',
 'what',
 'i',
 'choose',
 'it',
 'to',
 'mean',
 '-',
 'neither',
 'more',
 'nor',
 'less',
 '.',
 "''"]

In [71]:
# further process with function

max([w.lower() for w in nltk.word_tokenize(text)]) 

'word'

In [73]:
# further process with generator expression

max(w.lower() for w in nltk.word_tokenize(text)) 

'word'

# 4.3 Questions of Style

### Python Coding Style

    always use 4 spaces instead of tab for indentation
    
    align for when use multiple for in list comprehension

In [76]:
a = [1, 2, 3]
b = [2, 4, 5]

[(i, j) for i in a 
        for j in b]

[(1, 2), (1, 4), (1, 5), (2, 2), (2, 4), (2, 5), (3, 2), (3, 4), (3, 5)]

### Procedural Versus Declarative Style

In [77]:
# procedural style

tokens = nltk.corpus.brown.words(categories='news')
count = 0
total = 0
for token in tokens:
    count += 1
    total += len(token)
    
print(total/count)

4.401545438271973


In [78]:
# declarative style

total = sum(len(t) for t in tokens)
print(total/len(tokens))

4.401545438271973


In [83]:
def apple(x):
    '''
    print apple
    '''
    print('apple')

apple('a')

apple


In [84]:
help(apple)

Help on function apple in module __main__:

apple(x)
    print apple



### Parameter Passing

    The same is true for functions. Python interprets function parameters as values (this is known as call-by-value).

In [85]:
def set_up(word, properties):
    word = 'lolcat'
    properties.append('noun')
    properties = 5

w = ''
p = []

set_up(w, p)

In [86]:
w

''

In [87]:
p

['noun']

### Checking Parameter Types

    assert

# 4.5 Doing More with Functions

### Named Arguments

In [89]:
# *args -- arguments, **kwargs -- keyword arguments

def generic(*args, **kwargs):
    print(args)
    print(kwargs)

generic(1, 'African American', apple='apple')

(1, 'African American')
{'apple': 'apple'}


In [95]:
# *args is not a special character

song = [['four', 'calling', 'birds'],['three', 'French', 'hens'],['two', 'turtle', 'doves']]

list(zip(song[0], song[1], song[2]))

[('four', 'three', 'two'),
 ('calling', 'French', 'turtle'),
 ('birds', 'hens', 'doves')]

In [96]:
list(zip(*song))

[('four', 'three', 'two'),
 ('calling', 'French', 'turtle'),
 ('birds', 'hens', 'doves')]