# Dictionaries/Sets/deques

In [1]:
# each entry consists of a key should be immutable (tuple)
# purpose : efficient look up based on the key
{'abc' : 123, 'def' : 456}

{'abc': 123, 'def': 456}

In [2]:
dict([('abc',123), ('def', 456)])

{'abc': 123, 'def': 456}

In [3]:
# key hash table
{1:'abc', 1.1:(1,1),'one':['a','n'], (1,1): lambda x: x**2}

{1: 'abc',
 1.1: (1, 1),
 'one': ['a', 'n'],
 (1, 1): <function __main__.<lambda>(x)>}

In [4]:
x = {1:'abc', 'y':'hello', (1,1):3.14159}

In [7]:
x['def'] = -1

In [9]:
x['y'] = 'goodbye'

In [13]:
x.clear() #not x.clear but x.clear()

In [14]:
x

{}

In [16]:
x = set(range(5))
x

{0, 1, 2, 3, 4}

In [24]:
x.add(9)
x

{0, 1, 2, 3, 4, 9}

In [25]:
x.remove(9)
x

{0, 1, 2, 3, 4}

In [27]:
x.discard(7) # no error even with not existing one
x

{0, 1, 2, 3, 4}

# Sets comprehensions

In [28]:
{x.lower() for x in "The quick brown fox jumped a lazy dog."}

{' ',
 '.',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 't',
 'u',
 'w',
 'x',
 'y',
 'z'}

In [29]:
names = ["alice", "bob", "carol", "dave"]
grades = ["A","B","C","D"]
{name: grade for name, grade in zip(names,grades)}

{'alice': 'A', 'bob': 'B', 'carol': 'C', 'dave': 'D'}

In [30]:
import collections
collections.deque([1,2,3])

deque([1, 2, 3])

In [31]:
from collections import deque
deque(("A",2,True))

deque(['A', 2, True])

In [33]:
x = deque(range(3))

In [34]:
x.appendleft(-1)

In [35]:
x

deque([-1, 0, 1, 2])

In [36]:
x.append(3)
x

deque([-1, 0, 1, 2, 3])

In [37]:
x.popleft() # the left one left, others disappear

-1

In [38]:
x

deque([0, 1, 2, 3])

In [39]:
x.pop() # the right one left, others disappear

3

In [40]:
x

deque([0, 1, 2])

# Exercise 2

In [None]:
# Which is the most appropriate data structure and why? (It seems I got this question in a totally wrong way..)

# 1. a fixed collection of 100 integers => My answer : Tuple (immutable)
# ==> wrong. mean, stdev, .... For summary operation "Lists" are better than deques (simple)
# ==> A homogeneous collection (like 100 integers), then "Lists" are preferred.

# 2. a stack (first in last out) of customer records => My answer : Sets (no duplicate & immutable)
# ==> wrong. "Lists" are better and simple. Deques is good but overkill.

# a queue (first in first out) of customer records => My answer : Deques (mutable collection)
# ==> correct. "Deques" are good.

# a count of word occurances without a document => My answer : vector/array
# ==> wrong. "Dictionaries" are good for unique key and values.

# Numpy

In [41]:
import numpy as np

In [42]:
np.array([1,2,3]) #one dimension

array([1, 2, 3])

In [44]:
np.array([[1,2],[3,4]]) #two dimensions [[]] two sqaures

array([[1, 2],
       [3, 4]])

In [45]:
np.array([1,1]).dtype

dtype('int64')

In [47]:
np.array([True, False]).dtype

dtype('bool')

In [49]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [51]:
np.arange(3,5,0.25) #not including last value

array([3.  , 3.25, 3.5 , 3.75, 4.  , 4.25, 4.5 , 4.75])

In [52]:
np.linspace(0,1,11) #seems really important like rep(0,1,0.1) in R

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [53]:
np.logspace(0,2,11)

array([  1.        ,   1.58489319,   2.51188643,   3.98107171,
         6.30957344,  10.        ,  15.84893192,  25.11886432,
        39.81071706,  63.09573445, 100.        ])

In [54]:
np.ones(4)

array([1., 1., 1., 1.])

In [55]:
np.zeros(6) #rep(NA,6) in r

array([0., 0., 0., 0., 0., 0.])

In [57]:
np.full(3, False)

array([False, False, False])

In [58]:
np.empty(4) #assign values later

array([1., 1., 1., 1.])

In [59]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [60]:
np.identity(2)

array([[1., 0.],
       [0., 1.]])

In [61]:
np.zeros((2,2))

array([[0., 0.],
       [0., 0.]])

In [62]:
np.diag([3,2,1])

array([[3, 0, 0],
       [0, 2, 0],
       [0, 0, 1]])

In [63]:
np.tri(3) #lower triangular matrix

array([[1., 0., 0.],
       [1., 1., 0.],
       [1., 1., 1.]])

In [65]:
np.triu(np.full((3,3),3)) #upper triangular matrix

array([[3, 3, 3],
       [0, 3, 3],
       [0, 0, 3]])

In [138]:
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
x

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [75]:
x[0,2] #1st array's element and 2nd element that element

3

In [76]:
x[0][2] #1st array's element and 2nd element that element

3

In [77]:
x[0:3:2,:] #: means all

array([[1, 2, 3],
       [7, 8, 9]])

In [78]:
x[0:3:2]

array([[1, 2, 3],
       [7, 8, 9]])

In [92]:
#<value> = <array>[index] in 1D array
#<value> = <array>[row,col] in 2D array
#<array>[index] = <value> 
#<array>[row,col] = <value>
#<slice> = <array>[start:stop]
#<slice> = <array>[start_row:end_row, start_col:end_col]

array([], shape=(2, 0), dtype=int64)

In [95]:
x[0:3] # It means returning x[0], x[1], x[2].

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [97]:
x[0:3, 1:2]

array([[2],
       [5],
       [8]])

In [139]:
x[2:3,1:3]

array([[8, 9]])

In [141]:
x[2,1:3]

array([8, 9])

In [136]:
x[2,2:4]

array([8])

In [100]:
x[0:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [109]:
x[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [104]:
x[:2,1] #[row, col] 

array([2, 5])

In [108]:
x[:2,1:2]

array([[2],
       [5]])

In [110]:
x[:,:]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [111]:
x[:,]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [112]:
x[,:]

SyntaxError: invalid syntax (81718787.py, line 1)

In [113]:
x = np.arange(10)
y = x[2:5]
z = x[2:5].copy()

In [117]:
print("x=",x,", x.base=",x.base)

x= [0 1 2 3 4 5 6 7 8 9] , x.base= None


In [118]:
print("y=",y,", y.base=",y.base)

y= [2 3 4] , y.base= [0 1 2 3 4 5 6 7 8 9]


In [119]:
print("z=",z,", z.base=",z.base)

z= [2 3 4] , z.base= None


In [120]:
np.shares_memory(x,y)

True

In [121]:
np.shares_memory(y,z)

False

In [122]:
y.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [123]:
x = np.arange(9).reshape((3,3)); x
y = x.copy() ; y

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [144]:
x[(0,1,2),(0,1,2)] = -4 ;x

array([[-4,  2,  3],
       [ 4, -4,  6],
       [ 7,  8, -4]])

In [146]:
# reshaping arrays
x = np.arange(6);x

array([0, 1, 2, 3, 4, 5])

In [148]:
y = x.reshape((2,3));y

array([[0, 1, 2],
       [3, 4, 5]])

In [149]:
x.reshape((2,-1)) #-1 is just auto calculation of dimension

array([[0, 1, 2],
       [3, 4, 5]])

In [150]:
x.reshape(-1) # flattening way1

array([0, 1, 2, 3, 4, 5])

In [151]:
x.ravel() # flattening way2

array([0, 1, 2, 3, 4, 5])

In [152]:
x.flatten() # flattening way3 = copy

array([0, 1, 2, 3, 4, 5])

In [153]:
np.resize(np.ones((2,2)), (3,3)) #resize needs copy

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [157]:
y = np.ones((2,2))
y

array([[1., 1.],
       [1., 1.]])

In [159]:
y.resize((3,3), refcheck=False)
y

array([[1., 1., 1.],
       [1., 0., 0.],
       [0., 0., 0.]])

In [160]:
x = np.arange(4).reshape((2,2));x

array([[0, 1],
       [2, 3]])

# Numpy Numerics

In [162]:
np.arange(3)

array([0, 1, 2])

In [163]:
np.arange(3) + np.arange(3)

array([0, 2, 4])

In [164]:
np.full((2,2),2)

array([[2, 2],
       [2, 2]])

In [166]:
np.arange(4).reshape((2,2))

array([[0, 1],
       [2, 3]])

In [169]:
%timeit np.sum(np.arange(1000)) #numpy sum

6.84 µs ± 27.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [171]:
%timeit sum(np.arange(1000)) #basic python sum

139 µs ± 1.42 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [173]:
# matrix multiplication
x=np.arange(6).reshape(3,2)
y=np.tri(2,2)

array([[1., 0.],
       [1., 1.]])

In [174]:
x

array([[0, 1],
       [2, 3],
       [4, 5]])

In [175]:
y

array([[1., 0.],
       [1., 1.]])

In [176]:
x @ y

array([[1., 1.],
       [5., 3.],
       [9., 5.]])

In [180]:
np.matmul(x, y)

array([[1., 1.],
       [5., 3.],
       [9., 5.]])

In [177]:
y.T @ y

array([[2., 1.],
       [1., 1.]])

In [178]:
np.matmul(x.T, x)

array([[20, 26],
       [26, 35]])

In [182]:
# other linear algebra functions
np.linalg.det(y)
np.linalg.eig(x.T @ x)
np.linalg.inv(x.T @ x)
np.linalg.cholesky(x.T @ x)

array([[4.47213595, 0.        ],
       [5.81377674, 1.09544512]])

In [183]:
# random values
rng = np.random.default_rng(seed =123)
rng.random(3) # uniform[0,1)

array([0.68235186, 0.05382102, 0.22035987])

In [190]:
rng.random((1,2)) #row 1, col 2

array([[0.59312059, 0.35347072]])

In [184]:
rng.normal(0,2,size=(2,2))

array([[ 0.38794884,  1.8404618 ],
       [ 1.15420758, -1.27292729]])

In [185]:
rng.binomial(n=5, p=0.5, size=10)

array([2, 4, 4, 3, 2, 4, 2, 3, 3, 4])

In [193]:
# linear regression
rng = np.random.default_rng(seed=123)
n = 1000
X = rng.random((n,5)) #row n, col 5
X.shape 

(1000, 5)

In [195]:
# add intercept
X = np.hstack([
    np.ones((n,1)),
    rng.random((n,5))
])
X

array([[1.        , 0.85555695, 0.52814892, 0.61197368, 0.80950203,
        0.99968105],
       [1.        , 0.26192535, 0.48029289, 0.36100934, 0.84397882,
        0.59661069],
       [1.        , 0.00265024, 0.70523588, 0.54712876, 0.8194718 ,
        0.70562937],
       ...,
       [1.        , 0.97426674, 0.72561194, 0.991348  , 0.36509882,
        0.4767504 ],
       [1.        , 0.69232758, 0.46069786, 0.24016681, 0.78119413,
        0.09648325],
       [1.        , 0.69386465, 0.30527185, 0.77865176, 0.24356845,
        0.6028071 ]])

In [197]:
beta = np.array([7, -3.3, 2.4, 1.5, 9.7, -10]) #coefficient
beta

array([  7. ,  -3.3,   2.4,   1.5,   9.7, -10. ])

In [199]:
err = rng.normal(0, 0.1, size=n) #stdev 0.1 to make error small

In [202]:
y = X @ beta + err

In [209]:
# fit model
beta_hat = np.linalg.inv(X.T @ X) @ X.T @ y
print(beta_hat)
print(beta)

[  7.0098066   -3.28112581   2.38883789   1.50895041   9.67341049
 -10.00511203]
[  7.   -3.3   2.4   1.5   9.7 -10. ]
