In [1]:
from IPython.core.display import HTML
css = open('notebook_css/style-table.css').read() + open('notebook_css/style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

Author: @imflash217

#### Further Reading Books:
1. 

# An array of sequences

## Overview of `builtin` SEQUENCES

* **`Container Sequences`**: `list`, `tuple`, `collections.deque`  can hold items of **different types**

* **`Flat Sequences`**: `str`, `bytes`, `bytearray`, `memoryview`, `array.array` can hold item sof **ONLY one type**`

* **`Mutable Sequences`**: `list`, `bytearray`, `array.array`, `collections.deque`, `memoryview`

* **`IMMUTABLE Sequences`**: `tuple`, `str`, `bytes`

#### **`Container Sequences`** hold `references` to objects they contain, which may be of any type

#### **`Flat Sequences`** PHYSICALLY store the value of each item within its own memory space. 

### `list` is MUTABLE & mixed-type sequence

## `List Comprehensions` & `Generator Expressions`

#### Mastering `list-comprehensions` opens the door to `generator-expressions`; which can produce elements to fill-up sequences of any type

If you wan tot build a `list`, then a quick & efficient way to build it is **`list comprehensions`**

If you want to build any other kinds of sequences, then the best way is **`generator expressions`**

In [6]:
## example_2_1.py
## build a list of Unicode codepoints from a string

symbols = "$#🎯🙂🎲🤗ॐ"
codes = []
for symbol in symbols:
    codes.append(ord(symbol))
    print(symbol)

codes

$
#
🎯
🙂
🎲
🤗
ॐ


[36, 35, 127919, 128578, 127922, 129303, 2384]

In [10]:
## example_2_2.py
## build a list of Unicode codepoints from a string using LIST-COMPREHENSION

symbols = "$#🎯🙂🎲🤗ॐ"
codes = [ord(symbol) for symbol in symbols]

print(symbols)
codes

$#🎯🙂🎲🤗ॐ


[36, 35, 127919, 128578, 127922, 129303, 2384]

#### Listcomps no  longer leak variables

In [12]:

x = "ABC"
dummy = [ord(x) for x in x]

print(x)                         ## <- the value of x is preserved
print(dummy)                     ## <- the list comprehension produces the expected list

ABC
[65, 66, 67]


### `list-comprehension` v/s `map()` & `filter()`

* List-comprehensions do everything the `map` and `filter` functions do.

In [15]:
## example_2_3.py
## the same list built by a list-comprehension & map/filter functions

symbols = "$#🎯🙂🎲🤗ॐ"

In [20]:
%%time

beyond_ascii_listcomp = [ord(s) for s in symbols if ord(s) > 127]

print(beyond_ascii_listcomp)

[127919, 128578, 127922, 129303, 2384]
CPU times: user 316 µs, sys: 209 µs, total: 525 µs
Wall time: 401 µs


In [21]:
%%time

beyond_ascii_map_filter = list(filter(lambda c: c>127, map(ord, symbols)))

print(beyond_ascii_map_filter)

[127919, 128578, 127922, 129303, 2384]
CPU times: user 179 µs, sys: 74 µs, total: 253 µs
Wall time: 232 µs


### Cartesian Products

List-comprehension (aka `listcomp`) can generate cartesian product of two or more iterables.

The items that make up the cartesian product are tuples made from items from every input iterable.

The resulting list has a length equal to the lengths of the input iterables multiplied.

In [22]:
## example_2_4.py
## cartesian product using a list comprehension

colors = ["black", "white"]
sizes = ["S", "M", "L"]

## arranged by "color" first then "size"
tshirts = [(color, size) for color in colors for size in sizes]

tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

In [23]:
## arranged by "color" first then "size"
tshirts = [(color, size) for color in colors 
                         for size in sizes]

tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

In [25]:
## arranged by "color" first then "size"
for color in colors:
    for size in sizes:
        print((color, size))

('black', 'S')
('black', 'M')
('black', 'L')
('white', 'S')
('white', 'M')
('white', 'L')


In [27]:
## NOTE: arranged by "size" first then "color"
##
tshirts = [(color, size) for size in sizes 
                         for color in colors]

tshirts

[('black', 'S'),
 ('white', 'S'),
 ('black', 'M'),
 ('white', 'M'),
 ('black', 'L'),
 ('white', 'L')]

### Generator Expressions _aka_ `genexps`

## `Tuples` are not just immutable lists

### Tuples as records

In [29]:
## example_2_7.py
## tuples used as records

lax_corrdinates = (33.9425, -118.408056)
city, year, population, exchange, area = ("Tokyo", 2003, 32450, 0.66, 8014)
traveler_ids = [("USA", "31195855"), ("BRA", "CE342567"), ("ESP", "XDA205856")]

In [30]:
for passport in sorted(traveler_ids):
    print("%s / %s" % passport)

BRA / CE342567
ESP / XDA205856
USA / 31195855


In [31]:
for country, _ in traveler_ids:
    print(country)

USA
BRA
ESP


### Tuple Unpacking

#### An elegant application of tuple unpacking is **swapping** the values of variables w/o using. a temp variable

```python
b, a = a, b
```

#### Prefixing an argument with `*` when calling a function

In [32]:
divmod(20, 8)

(2, 4)

In [33]:
t = (20, 8)
divmod(*t)

(2, 4)

In [34]:
quotient, remainder = divmod(*t)
quotient, remainder

(2, 4)

In [35]:
import os
root, filename = os.path.split("/home/imflash217/.ssh/idrsa.pub")
root, filename

('/home/imflash217/.ssh', 'idrsa.pub')

#### Using `*` to grab excess items

In [36]:
a, b, *rest = range(5)
a, b, rest

(0, 1, [2, 3, 4])

In [37]:
a, b, *rest = range(3)
a, b, rest

(0, 1, [2])

In [38]:
a, b, *rest = range(2)
a, b, rest

(0, 1, [])

In [39]:
a, *body, c, d = range(5)
a, body, c, d

(0, [1, 2], 3, 4)

In [40]:
*head, b, c, d = range(5)
head, b, c, d

([0, 1], 2, 3, 4)

### Nested Tuple Unpacking

For example: `(a, b, (c, d))`

In [59]:
## example_2_8.py
## unpacking nested tuples to access the longitudes

metro_areas = [("Tokyo", "JP", 36.933, (35.689722, 139.679245)),
               ("Delhi NCR", "IN", 21.935, (28.613889, 77.208889)),
               ("Mexico City", "MX", 20.142, (19.433333, -99.133333)),
               ("New York-Newark", "US", 20.104, (40.808611, -74.020386)),
               ("Sao Paulo", "BR", 19.649, (-23.547776, -46.635833)),
              ]

print(f"{'':15} | {'lat.':^9} | {'long.':^9}")
for name, cc, pop, (lat, long) in metro_areas:
    if long <= 0:
        print(f"{name:15} | {lat:9.4f} | {long:9.4f}")

                |   lat.    |   long.  
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
Sao Paulo       |  -23.5478 |  -46.6358


### Named Tuples

Try to keep the name of the `collections.namedtuple` **instance** and its `typename` same. For eg.: `Card = namedtupel("Card",.....)`

The `collections.namedtuple` function is a **factory** that produces subclasses of `tuple` enhanced with field names and a class name -- which helps debugging

Instances of a class build using `collections.namedtuple` takes exactly the same amount of memory as tuples because the field names are stored in the class.

But, they use less memory than a regular object because they do store attributes in a per-instance `__dict__`.

Two paprameters are required to create a `namedtuple`..... **(1).** a class name `typename`  **(2).** a list of field names `filed_names`

Apart from methods acquired form `tuple`; `namedtuple` has some special methods too:

1. `._fields`
2. `._make(*)`
3. `._asdict()`

In [60]:
import collections

In [71]:
my_card = collections.namedtuple(typename="Card", field_names=["rank", "suit"])

In [72]:
my_card

__main__.Card

In [75]:
## example_2_9.py
## defining and using namedtuple type

import collections

my_city = collections.namedtuple(typename="City", field_names="name country population coordinates")
tokyo = my_city("Tokyo", "JP", 36.933, (35.689722, 139.691667))


In [76]:
tokyo

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))

In [77]:
tokyo.population

36.933

In [78]:
tokyo.country

'JP'

In [79]:
tokyo.name

'Tokyo'

In [80]:
tokyo.coordinates

(35.689722, 139.691667)

#### We can access the fields by **names** or by **index**

In [84]:
tokyo

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))

In [83]:
len(tokyo)

4

In [85]:
tokyo[2]

36.933

In [87]:
tokyo[1]

'JP'

In [88]:
tokyo[-1]

(35.689722, 139.691667)

In [90]:
## example_2_10.py
## namedtuple attributes & methods (continued from example_2_9.py)

my_city._fields

('name', 'country', 'population', 'coordinates')

In [92]:
LatLong = collections.namedtuple(typename="LatLong", field_names="lat long")

delhi_data = ("Delhi NCR", "IN", 21.935, LatLong(28.613889, 77.208889))

delhi = my_city._make(delhi_data)

delhi

City(name='Delhi NCR', country='IN', population=21.935, coordinates=LatLong(lat=28.613889, long=77.208889))

In [93]:
delhi._asdict()

OrderedDict([('name', 'Delhi NCR'),
             ('country', 'IN'),
             ('population', 21.935),
             ('coordinates', LatLong(lat=28.613889, long=77.208889))])

In [94]:
for key, val in delhi._asdict().items():
    print(f"{key} : {val}")

name : Delhi NCR
country : IN
population : 21.935
coordinates : LatLong(lat=28.613889, long=77.208889)


### Tuples as immutable lists

In [132]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

In [134]:
data = [("a", "b", "c", "d"),
        ("AAAAAAAA", "✅", "🎯", "D"),
       ]

df = pd.DataFrame(data, columns="w x y z".split(" "))
# df.style.set_properties(**{'text-align': 'left'})
df

Unnamed: 0,w,x,y,z
0,a,b,c,d
1,AAAAAAAA,✅,🎯,D


In [147]:
yes = "🎯"
no = ""
data = [("seq.__add__(seq2)", yes, yes, "`seq + seq2`  --> CONCATENATION"),
        ("seq.__iadd__(seq2)", yes, no, "`seq += seq2` --> in-place CONCATENATION"),
        ("seq.append(e)", yes, no, "append one element after last"),
        ("seq.clear()", yes, no, "delete ALL items"),
        ("seq.__contains__(e)", yes, yes, "`e in seq`"),
        ("seq.copy()", yes, no, "SHALLOW copy of the sequence"),
        ("seq.count(e)", yes, yes, "count occurances of the element"),
        ("seq.__delitem__(idx)", yes, no, "DELETE item from the index position"),
        ("seq.extend(it)", yes, no, "APPEND items from the ITERABLE 'it'"),
        ("seq.__getitem__(idx)", yes, yes, "`seq[idx]`  --> get item from the index position"),
        ("seq.__getnewargs__()", no, yes, "support for optimized serialization with `pickle`"),
        ("seq.index(e)", yes, yes, "find index position of the FIRST occurance of the element `e`"),
        ("seq.insert(idx, e)", yes, no, "insert element `e` BEFORE the item at index/position `idx`"),
        ("seq.__iter__()", yes, yes, "get ITERATOR"),
        ("seq.__len__()", yes, yes, "`len(s)` --> number of items in the sequence"),
        ("seq.__mul__(n)", yes, yes, "`seq * n` --> repeated CONCATENATION"),
        ("seq.__imul__(n)", yes, no, "`seq *= n` --> IN-PLACE repeated concatenation"),
        ("seq.__rmul__(n)", yes, yes, "`n * seq` --> REVERSED repeated concatenation"),
        ("seq.pop(<< idx >>)", yes, no, "REMOVE & RETURN last element or item at the OPTIONAL index `idx`"),
        ("seq.remove(e)", yes, no, "remove FIRST OCCURANCE of element `e` by-value"),
        ("seq.reverse()", yes, no, "reverse the order of the items IN_PLACE"),
        ("seq.__reversed__()", yes, no, "get ITERATOR to scan element from last-to-first"),
        ("seq.__setitem__(idx, e)", yes, no, "`seq[p] = e` --> put element `e` at index `idx`, OVERIDING the existing item"),
        ("seq.sort(<< key >>, << reverse >>)", yes, no, "sort items IN-PLACE with optional keywords `key` & `reverse`"),
       ]

df = pd.DataFrame(data, columns="#, list, tuple,  ".split(", "))
df.style.set_properties(**{'text-align': 'left'})
df

Unnamed: 0,#,list,tuple,Unnamed: 4
0,seq.__add__(seq2),🎯,🎯,`seq + seq2` --> CONCATENATION
1,seq.__iadd__(seq2),🎯,,`seq += seq2` --> in-place CONCATENATION
2,seq.append(e),🎯,,append one element after last
3,seq.clear(),🎯,,delete ALL items
4,seq.__contains__(e),🎯,🎯,`e in seq`
5,seq.copy(),🎯,,SHALLOW copy of the sequence
6,seq.count(e),🎯,🎯,count occurances of the element
7,seq.__delitem__(idx),🎯,,DELETE item from the index position
8,seq.extend(it),🎯,,APPEND items from the ITERABLE 'it'
9,seq.__getitem__(idx),🎯,🎯,`seq[idx]` --> get item from the index position


## Slicing

### Assigning to slices

In [166]:
l = list(range(10))
l

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [167]:
len(l)

10

In [168]:
l[2:5]

[2, 3, 4]

In [169]:
l[2:5] = [2000, 3000]

In [170]:
l

[0, 1, 2000, 3000, 5, 6, 7, 8, 9]

In [171]:
len(l)

9

In [172]:
del l[5:7]

In [173]:
l

[0, 1, 2000, 3000, 5, 8, 9]

In [174]:
len(l)

7

In [175]:
l[2:5]

[2000, 3000, 5]

In [177]:
l[2:5] = [999]

In [178]:
l

[0, 1, 999, 8, 9]

In [179]:
len(l)

5

In [180]:
l[2:5] = 888

TypeError: can only assign an iterable

### Using ` + ` and ` * ` with sequences

Used for CONCATENATION. 

ALWAYS creates a NEW sequence (does not change the original input sequences)

**NOTE: Beware of expressions like `mutable_seq * n` (where `mutable_seq` is a MUTABLE SEQUENCE)**

For eg: `my_list = [[]] * 3` creates a list with three references to the same inner list.

In [181]:
l = [1, 2, 3]

l * 5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [182]:
l

[1, 2, 3]

In [183]:
5 * "abcd"

'abcdabcdabcdabcdabcd'

## Building list of lists