In [None]:
# to visualise sequence of execution of python code: https://pythontutor.com/

In [247]:
import numpy as np
import array
from random import random
import time
from time import perf_counter as pc
from collections import deque
import bisect
import sys

In [1]:
# container sequences: hold data of different types. These hold references to the objs they contain.
# flat sequences: hold data of a single type. Tends to store value of each item in that memory space (more compact).

# can also classify sequences by (im)mutability

# listcomp = list comprehensions. Suggests (convincingly) these are faster and easier to read than map(). 
# only make lists
# These are loops in this style:
txt = 'the world'
[ord(x) for x in txt]


[116, 104, 101, 32, 119, 111, 114, 108, 100]

In [4]:
[a + b for a in range(1, 3) for b in range(0, 3)]   # nested loops as listcomp

[1, 2, 3, 2, 3, 4]

In [6]:
# genexps = generator expressions. Similar to listcomps, but can make things other than lists
tuple(ord(x) for x in txt)  # tuple() instead of [] returns a tuple instead of list


(116, 104, 101, 32, 119, 111, 114, 108, 100)

In [11]:
array.array('i',(-ord(x) for x in txt) )  # genexp makes array of signed int 
# (change i to uppercase to make unsigned int)

array('i', [-116, -104, -101, -32, -119, -111, -114, -108, -100])

In [21]:
for t in (a + b for a in 'snvkb' for b in 'hhh'):  # iterating over a generator object
    print(t)

sh
sh
sh
nh
nh
nh
vh
vh
vh
kh
kh
kh
bh
bh
bh


In [24]:
# tuples: more than just immutable lists. 
# Also useful as records without field names: order of fields is critical
people = [('adam', 17), ('jen', 20)]
[print('%s, %s' % person) for person in people]  # '%' treats each each tuple's item as a separate field


adam, 17
jen, 20


[None, None]

In [30]:
firstname, age = people[0]  # tuples can be 'unpacked' as here, and print() in the cell above, and other methods
people[0]

('adam', 17)

In [34]:
vals = (3, 8)
divmod(*vals)  # unpack tuples directly into function inputs

(0, 3)

In [39]:
a, b, *rest = range(7)  # rest mops up all but the first two values as it's own tuple
print(rest)
a, *rest, b = range(7)   # mops up all but first and last values
print(rest)

[2, 3, 4, 5, 6]
[1, 2, 3, 4, 5]


In [41]:
# extract embedded coords in embedded tuple
metro_areas = [ ('Tokyo','JP',36.933,(35.689722,139.691667)), 
               ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889))]
for name, cc, pop, (latitude, longitude) in metro_areas:   
    print(str(latitude) + ' ' + str(longitude))

35.689722 139.691667
28.613889 77.208889


In [44]:
# named tuples seem like classes: need to be defined
from collections import namedtuple
City = namedtuple('City', 'name country population coordinates') # sets names of fields, separated by spaces
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
tokyo.population

36.933

In [47]:
london_data = ('London', 'UK', 36.933, (35.689722, 139.691667))
london = City._make(london_data)  # alternative way of making namedtuple
london._asdict()     

{'name': 'London',
 'country': 'UK',
 'population': 36.933,
 'coordinates': (35.689722, 139.691667)}

In [61]:
a = (1, 2, 4, 5)
print(sum(a))   # tuples support same methods as lists, apart from .reverse() - use reversed(tuple) instead


a = [1, 2, 4]
a.reverse()
a

12
<reversed object at 0x1029d7c40>


[4, 2, 1]

In [63]:
a = list(range(11))
print(a[:5])    # easy to split lists in half as use same number in index, just change position of :
print(a[5:])

[0, 1, 2, 3, 4]
[5, 6, 7, 8, 9, 10]


In [86]:
# slicing backwards and with leaps 
s = 'bicycle'
print(s[::3])  # every 3rd letter
print(s[::2])
print(s[1::1])
print(s[1:4:2])
print(s[1::2])
print(s[::-1])
print(s[::-2])
print(s[::-3])

# behind the scenes python is calling seq.__getitem__(slice(start, stop, step))

bye
bcce
icycle
iy
iyl
elcycib
eccb
eyb


In [93]:
# all base python sequences are 1-dimensional, unlike some in packages (Eg numpy)

# ...  = alias of the "Ellipsis" obj
# ... can be put as part of a function input
...

Ellipsis

In [109]:
a = np.arange(72)
a = np.reshape(a, (4,3,2,3))  # these 2 lines make dummy 4d array

a[1, :, :, :,]   # slice by first dimension
print(a[1, ...])        # Ellipsis gives shorthand for same thing
print('2nd:')
print(a[1, ..., 1] )  

[[[18 19 20]
  [21 22 23]]

 [[24 25 26]
  [27 28 29]]

 [[30 31 32]
  [33 34 35]]]
2nd:
[[19 22]
 [25 28]
 [31 34]]


In [111]:
print((1, 2, 3) * 5)
print('abs' * 5)

(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3)
absabsabsabsabs


In [113]:
[ ['_'] * 3 for x in range(3)]  # making list of lists

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [117]:
# += refers to __iadd__ method (in place addition)
a = 1
a += 4
print(a)

a *= 5   # __imul__
print(a)

5
25


In [120]:
id(a)   # identity value of the object

4301827120

In [126]:
import dis
dis.dis('a += 5')  # view bytecode execution

  1           0 LOAD_NAME                0 (a)
              2 LOAD_CONST               0 (5)
              4 INPLACE_ADD
              6 STORE_NAME               0 (a)
              8 LOAD_CONST               1 (None)
             10 RETURN_VALUE


In [133]:
# see more on looking at bytecode compilation with dis module: https://docs.python.org/3/library/dis.html
dis.code_info('a += 5')


AttributeError: 'Bytecode' object has no attribute 'disassemble'

In [139]:
a = ['hi', 'osnvlkrv', 'ons']
a.sort(reverse = True, key = len)     # inplace by default; returns None so can't chain dot operations
a          # on row about 'key' sets criteria for sorting

# sequences should be sorted for fast searching

['osnvlkrv', 'ons', 'hi']

In [157]:
HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
needle = 5
idx = bisect.bisect_right(HAYSTACK, needle)  # left/right sets which side of identical values we return for "needle" value
bisect.bisect_left(HAYSTACK, needle)

2

In [158]:
HAYSTACK.insert(idx, needle)
HAYSTACK

[1, 4, 5, 6, 8, 3, 12, 15, 20, 21, 23, 23, 26, 29, 30]

In [160]:
# more efficienct than cell above is bisect.insort() which combined bisect() and insert()
HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
needle = 5
bisect.insort(HAYSTACK, needle)   # equiv of bisect_right and insert. insort_left() for bisect_left() equiv
HAYSTACK

[1, 4, 5, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]

In [186]:
# says if you're using lists of numbers, better to put them in arrays
# if adding and removing items from ends of array, better to use a deque (double ended queue)

# arrays are as lean as arrays in C. for example, making an array of 10 million 8byte floats. Will be close to 8*10^7 bytes
# in size, having little overhead 
floats = array.array('d', (random() for i in range(10**7)))   # takes 3-4 seconds to make 10m values
sys.getsizeof(floats)

81940352

In [190]:
a = time.time()
floats2 = [f * 2 for f in floats]
b = time.time()
b - a   # just over 1 second to multiply 10m values: arrays are fast!

1.3570830821990967

In [193]:
floats2[:10]

[1.1134023770386026,
 1.7309273672306238,
 1.1847057096754787,
 1.9812085789342493,
 1.7306939014870493,
 1.8411784759096046,
 1.204247133724379,
 0.6918232415303285,
 0.7868675689353266,
 1.4358686357448047]

In [169]:
# sets are optimised for fast membership checking: good if you need to know if someone is in a group
# sets arent ordered, just key values

184

In [None]:
sys.getsizeof(HAYSTACK)  # size of object in bytes

In [None]:
# pickle.dump as a good way of saving arrays and more flexible with object structures and formatting

In [195]:
floats2 = sorted(floats2)
floats2[:10]

[3.814934779189372e-07,
 5.731365886330053e-07,
 6.191980364000926e-07,
 6.871367632665937e-07,
 7.930918486387384e-07,
 1.1726723645466564e-06,
 1.9383024376651292e-06,
 2.1088299060334492e-06,
 2.136180758283146e-06,
 2.1808767001196827e-06]

In [217]:
# Memory View ('memorview' class) = sequence type that can slice arrays without copying bytes
# inspired by NumPy, but isnt' limited to that, includes sql DBs, PIL images, etc
# Much faster than methods that involve copying bytes
numbers = array.array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
memv_oct = memv.cast('B')  # setting up to look at Bytes
memv_oct[5] = 4  # changing one of the bytes, which chanegs the memory view obj 
                    # and the 'numbers' obj it points at
numbers         
# this can be a fast way to do things


# SciPy is based on C and Fortran, so it's very fast

array('h', [-2, -1, 1024, 1, 2])

In [222]:
a = np.arange(20)
a.shape = 4, 5
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [229]:
floats_np = np.array(floats)
t0 = pc(); floats2 = floats * 2; pc() - t0  # 0.1 seconds: much faster than listcomp. So use numpy for array maths!

0.11609937899993383

In [234]:
# making an 8-bit array in numpy
dt = np.dtype(np.int8)  
x = np.array(range(20), dtype=dt)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19], dtype=int8)

In [246]:
dq = deque(range(10), maxlen=20)  # a double queue
print(dq)
dq.rotate(5)   
print(dq)
dq.extend(range(99, 104))
print(dq)
dq.extendleft(range(100, 500, 4))  # overwrites as many data up to maxlen limit
print(dq)   

# queue stores data in order with first-in-first-out approach

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=20)
deque([5, 6, 7, 8, 9, 0, 1, 2, 3, 4], maxlen=20)
deque([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 99, 100, 101, 102, 103], maxlen=20)
deque([496, 492, 488, 484, 480, 476, 472, 468, 464, 460, 456, 452, 448, 444, 440, 436, 432, 428, 424, 420], maxlen=20)
