# 3. Data Structures

## 3.1 More on Lists

### list.append(x)
Add an item to the end of the list.

In [None]:
fruits = ['orange', 'apple', 'pear', 'banana', 'kiwi', 'apple']
fruits.append('banana')
fruits

### list.insert(i, x) 
Insert an item at a given position.

In [None]:
fruits.insert(1, 'beer')
fruits

### list.remove(x)
Remove the first item from the list whose value is x.

In [None]:
fruits.remove('beer')
fruits

In [None]:
fruits.remove('beer')

In [None]:
fruits.remove('apple')
fruits

### list.index(x)
Return zero-based index in the list of the first item whose value is x.

In [None]:
fruits.index('banana')

### list.count(x)
Return the number of times x appears in the list.

In [None]:
fruits.append('banana')
print(fruits)
fruits.count('banana')

### list.reverse()
Reverse the elements of the list in place.

In [None]:
fruits.reverse()
fruits

### sorted(list)
Sort the items of the list in place.

In [None]:
sorted(fruits)

### The del statement
Remove an item from a list given its index instead of its value.

In [None]:
fruits = ['orange', 'apple', 'pear', 'banana', 'kiwi', 'apple']
print(fruits)
del fruits[1]
print(fruits)

### List Comprehensions
A concise way to create lists.

In [None]:
squares = []
for x in range(10):
    squares.append(x**2)
squares

In [None]:
squares = [x**2 for x in range(10)]
squares

### Nested list comprehension with if statement

In [None]:
[(x, y) for x in [1,2,3] for y in [1,2,3] if x != y]

In [None]:
comb = []
for x in [1,2,3]:
    for y in [1,2,3]:
        if x != y:
            comb.append((x, y))
comb

## 3.2 Sets
### A set is an unordered collection with no duplicate elements.

In [None]:
fruits = ['orange', 'apple', 'pear', 'banana', 'kiwi', 'apple']
print(fruits)
print(set(fruits))

In [None]:
a = set('abracadabra')
b = set('alacazam')
print(a)
print(b)

### set operations on unique letters from two words

In [None]:
print(a - b)  # letters in a but not in b
print(a | b)  # letters in a or b or both
print(a & b)  # letters in both a and b
print(a ^ b)  # letters in a or b but not both

## 3.3 Tuples
### A tuple consists of a number of values separated by commas

In [None]:
x = 1
y = 2
z = 3
t = x, y, z
t

### Tuples are immutable

In [None]:
t[0] = 10
t

### Lists are mutable

In [None]:
l = [1, 2, 3]
l[0] = 10
l

In [None]:
x, y, z = 4, 5, 6
x, y, z

## 3.4 Dictionaries

### It is best to think of a dictionary as an unordered set of key: value pairs, with the requirement that the keys are unique.

In [None]:
d = {}
d = {'Tom': 123, 'May': 155}
print(d['Tom'])

### Add a new key value pair

In [None]:
d['Mary'] = 556
print(d)

### Overwrite the existent key value pair

In [None]:
d['Tom'] = 987
print(d)

### Delete a key value pair by key name

In [None]:
del d['May']
print(d)

### Access keys, values, key-value pairs from dict

In [None]:
print(d.keys())
print(d.values())
print(d.items())

### dict comprehensions

In [None]:
{x: x**2 for x in range(2, 8)}

### Use dict for counting

In [None]:
fruits = ['orange', 'apple', 'pear', 'banana', 'kiwi', 'apple', 'apple', 'banana']
print('apple' in fruits)

In [None]:
d = {}
for fruit in fruits:
    if fruit in d:
        d[fruit] += 1
    else:
        d[fruit] = 1
print(d)

## 3.5 Looping Techniques

### When looping through a sequence, the position index and corresponding value can be retrieved at the same time using the enumerate() function.

In [None]:
for i, v in enumerate(['tic', 'tac', 'toe']):
    print(i, v)

### When looping through dictionaries, the key and corresponding value can be retrieved at the same time using the items() method.

In [None]:
d = {'Mary': 556, 'Tom': 123, 'May': 155, 'Alan': 135}
for k, v in d.items():
    print(k, v)

### To loop over two or more sequences at the same time, the entries can be paired with the zip() function.

In [None]:
names = ['Mary', 'Tom', 'May', 'Alan']
nums = [556, 123, 155, 135]
for name, num in zip(names, nums):
    print(name, num)

### To loop over a sequence in sorted order, use the sorted() function which returns a new sorted list.

In [None]:
sorted_names = sorted(names)
sorted_dictkeys = sorted(d.keys())
sorted_dictvalues = sorted(d.values())
sorted_dictitems = sorted(d.items())
print(sorted_names)
print(sorted_dictkeys)
print(sorted_dictvalues)
print(sorted_dictitems)

## Exercise 3.1: Use usual way and list comprehensions to generate a list with even numbers starts from 2.

In [None]:
even_list = [...]
print(even_list)

## Exercise 3.2 Use dictionary to count the term frequency in a sentence.  
Hints: use split( )

In [None]:
sentence = "I scream you scream we all scream for ice cream"
d = {}
...

## Exercise 3.3 Use Set to caculate Jaccard Similarity   
Hints: J(A, B) = (A intersection B) / (A union B)

In [None]:
sentence1 = "看 魚 仔 在 那 游 來 游 去 游 來 游 去 我 對 你 想 來 想 去 想 來 想 去"
sentence2 = "花 在 風 中 搖 來 搖 去 搖 來 搖 去 我 對 你 想 來 想 去 想 到 半 暝"
sentence3 = "哭 啊 喊 啊 叫 你 媽 媽 帶 你 去 買 玩 具 啊"
