# Numpy Refresher with parts taken from 
###### https://jakevdp.github.io/PythonDataScienceHandbook/02.01-understanding-data-types.html

In [None]:
for i in range(1,5,1): ##from 1 to 10 excluding 10, range(1,5,1)
    print(i)

In [None]:
for i in range(5): 
    print(i)

In [1]:
import numpy as np

In [None]:
for i in np.linspace(1,10,10):
    print(i)

### Note: Storing an integer (eg) in Python incurs <font color=red>overhead </font> as compared to an integer in a compiled language like C
- a reference count that helps Python silently handle memory allocation and deallocation
- the type of the variable
- specifies the size of the following data members
- contains the actual integer value that we expect the Python variable to represent.

In [114]:
L = list(range(10))
print(type(L[0]))
print(type(L))

<class 'int'>
<class 'list'>


In [118]:
L2 = [str(c) for c in L] ## for loop style initializer
L2

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [120]:
L3 = [x for x in range(10)] ## np.array([x ...]) will make it array 
L3

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
L4 = [True, "2", 3.0, 4] # heterogeneous lists,
[type(item) for item in L4] 

### But this flexibility comes at a cost: to allow these flexible types, each item in the list must contain its own type info, reference count, and other information–that is, each item is a complete Python object

### Fixed-type NumPy-style arrays lack this flexibility, but are much more efficient for storing and manipulating data.

In [None]:
list(map(type,L4)) ## This is the same as above 

In [9]:
kobe = np.array([1,2,3,4])
type(kobe[0])

numpy.int32

In [131]:
kobe = np.array([1.0,2,3,4])
type(kobe[0])

numpy.float64

In [11]:
kobe = np.array([1,2,3,4],dtype='float32')
type(kobe[0])

numpy.float32

In [136]:
kobe = np.array([range(i, i + 3) for i in [2, 4, 6]])
kobe

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

In [64]:
np.mean(kobe,axis=0)

array([4., 5., 6.])

In [142]:
print('the maximum of each column is',*np.max(kobe,axis=0))

the maximum of each column is 6 7 8


In [143]:
print('the maximum of each row is',*np.max(kobe,axis=1))

the maximum of each row is 4 6 8


In [62]:
print(*map(np.mean,kobe))

3.0 5.0 7.0


In [90]:
print(*map(lambda x: np.mean(x),kobe))

4.0 5.0 6.0


In [33]:
fruits = ['lemon', 'pear', 'watermelon', 'tomato']
print(fruits)
print(*fruits,sep=', ')

['lemon', 'pear', 'watermelon', 'tomato']
lemon, pear, watermelon, tomato


In [87]:
np.full((3,3,3),np.pi)

array([[[3.14159265, 3.14159265, 3.14159265],
        [3.14159265, 3.14159265, 3.14159265],
        [3.14159265, 3.14159265, 3.14159265]],

       [[3.14159265, 3.14159265, 3.14159265],
        [3.14159265, 3.14159265, 3.14159265],
        [3.14159265, 3.14159265, 3.14159265]],

       [[3.14159265, 3.14159265, 3.14159265],
        [3.14159265, 3.14159265, 3.14159265],
        [3.14159265, 3.14159265, 3.14159265]]])

In [68]:
np.arange(0,20,2) ## similar to `seq` function is R seq(0,20,by=2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [75]:
np.linspace(0,20,2)## similar to `seq` function is R seq(0,20,length.out=2)

array([ 0., 20.])

In [78]:
np.random.normal(0,1,(3,3,3)) # mean 0 and std of 1

array([[[-2.65906414,  0.88792103,  1.08215352],
        [-0.38222666, -0.20668316, -1.1659605 ],
        [-1.6740021 ,  0.10723061,  0.13331987]],

       [[ 0.17499693, -0.01925813, -0.41738013],
        [ 0.78144712,  0.69430041, -0.55739256],
        [-0.38203799, -0.02815517,  1.07436526]],

       [[ 0.59975532,  0.47011006,  1.271029  ],
        [-2.81406015,  0.97438567, -0.87081845],
        [ 0.34729814, -0.39735765, -0.22948547]]])

In [81]:
np.random.random((3, 3, 3))

array([[[1.36908257, 1.32881023, 1.69380847],
        [1.47655607, 2.49206505, 2.08814639],
        [1.1126354 , 1.34652463, 1.66061794]],

       [[1.48426448, 2.17470811, 2.27158208],
        [2.89884903, 1.20368805, 1.00547749],
        [2.55921742, 1.93237368, 1.34392881]],

       [[1.90819459, 1.61884547, 2.66806047],
        [1.39440685, 2.16873136, 1.80372926],
        [2.8659008 , 2.95093759, 1.30408652]]])

In [86]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that memory location
np.empty(5)

array([7.82834916e-312, 7.58696787e-315, 7.58696850e-315, 7.58696913e-315,
       6.89801077e-307])

In [107]:
np.random.seed(2021)
np.random.randint(0, 10, (3, 3, 3))

array([[[4, 5, 9],
        [0, 6, 5],
        [8, 6, 6]],

       [[6, 6, 1],
        [5, 7, 1],
        [1, 5, 2]],

       [[0, 3, 1],
        [0, 2, 6],
        [4, 8, 5]]])

In [121]:
np.zeros(10, dtype='int16') ## equivalent to np.zeros(10, dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [193]:
x = np.array([np.arange(1,10,1)]).reshape(3,3)
print('x=','\n'
,x)
y = np.array([np.arange(1,13,1)]).reshape(2,6)
print('y=','\n', y)

x= 
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
y= 
 [[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]


In [215]:
x[:,None,:] ## imagine it as a 3d cube

array([[[1, 2, 3]],

       [[4, 5, 6]],

       [[7, 8, 9]]])

In [199]:
x[:,None,:][:,:,1] ## the second slice of the 3d cube

array([[2],
       [5],
       [8]])

In [227]:
x[:,:,None] ## 3d cube with just the first element being the x

array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]],

       [[7],
        [8],
        [9]]])

In [183]:
## normalizing the rows by its row sums
x_norm = (x.T/np.sum(x,axis=1)).T
print('normalized array:',"\n" ,x_norm)
print('row sum is (should be 1): ', np.sum(x_norm,axis=1))

normalized array: 
 [[0.16666667 0.33333333 0.5       ]
 [0.26666667 0.33333333 0.4       ]
 [0.29166667 0.33333333 0.375     ]]
row sum is (should be 1):  [1. 1. 1.]


In [181]:
np.sum(x,1)[:,None] ## note: np.sum(x,1).T won't do the following

array([[ 6],
       [15],
       [24]])

In [286]:
np.sum(x,axis=1).reshape(3,1) ## this will do the same as above

array([[ 6],
       [15],
       [24]])

In [248]:
x_norm = x/np.sum(x,axis=1,keepdims=True) ## 
print('normalized array by row:',"\n" ,x_norm)
print('row sum is (should be 1):',"\n", np.sum(x_norm,axis=1,keepdims=True))

normalized array by row: 
 [[0.16666667 0.33333333 0.5       ]
 [0.26666667 0.33333333 0.4       ]
 [0.29166667 0.33333333 0.375     ]]
row sum is (should be 1): 
 [[1.]
 [1.]
 [1.]]


In [245]:
np.sum(x,axis=0,keepdims=True)

array([[12, 15, 18]])

In [249]:
x_norm = x/np.sum(x,axis=0,keepdims=True) ## 
print('normalized array by column:',"\n" ,x_norm)
print('col sum is (should be 1): ', np.sum(x_norm,axis=0,keepdims=True))

normalized array by column: 
 [[0.08333333 0.13333333 0.16666667]
 [0.33333333 0.33333333 0.33333333]
 [0.58333333 0.53333333 0.5       ]]
col sum is (should be 1):  [[1. 1. 1.]]


## Indexing in numpy array

#### unlike Python lists, NumPy arrays have a fixed type. 

In [276]:
np.random.seed(2021)
x_int = np.random.randint(0,10,(3,4))
x_int

array([[4, 5, 9, 0],
       [6, 5, 8, 6],
       [6, 6, 6, 1]])

In [234]:
x_int[2,-1]

1

In [238]:
# x[start:stop:step]
x_int[0:2,:]

array([[4, 5, 9, 0],
       [6, 5, 8, 6]])

In [251]:
x_int[0:2,1::2]

array([[5, 0],
       [5, 6]])

In [260]:
x_int[1:,::-1]# all elements, reversed for the second and third row

array([[6, 8, 5, 6],
       [1, 6, 6, 6]])

In [264]:
x_int[1:,::-2]# reversed every other for the second and third row

array([[6, 5],
       [1, 6]])

In [266]:
x_int[::-1, ::-1]# reversed the row and column all together

array([[1, 6, 6, 6],
       [6, 8, 5, 6],
       [0, 9, 5, 4]])

In [207]:
## won't change the type unlike list
x_int[0,0] = np.pi
x_int

array([[3, 2, 8, 5],
       [3, 1, 1, 9],
       [2, 7, 5, 3]])

In [214]:
print('the shape of x is:', '\n', x_int.shape)
print('the size of x is:','\n', x_int.size)

the shape of x is: 
 (3, 4)
the size of x is: 
 12


#### Subarrays as no-copy views
One important–and extremely useful–thing to know about array slices is that they return views rather than copies of the array data. This is one area in which NumPy array slicing differs from Python list slicing: in lists, slices will be copies. 

In [277]:
x_int_sub = x_int[:2,:2]
x_int_sub

array([[4, 5],
       [6, 5]])

In [278]:
x_int_sub[0,0] = 300
x_int_sub

array([[300,   5],
       [  6,   5]])

In [279]:
x_int ## original copy has also been changed

array([[300,   5,   9,   0],
       [  6,   5,   8,   6],
       [  6,   6,   6,   1]])

In [280]:
###How to actually make a copy of np array object
x_int_sub_copy = x_int[:2,:2].copy()
x_int_sub_copy

array([[300,   5],
       [  6,   5]])

In [282]:
x_int_sub_copy[0,0] = 0
x_int_sub_copy

array([[0, 5],
       [6, 5]])

In [283]:
x_int_sub

array([[300,   5],
       [  6,   5]])

In [330]:
x_vec = np.array(range(10))
x_vec

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [331]:
x_vec.reshape((10,1)) ## same as x_vec[:,None]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [332]:
 np.newaxis is None #btw these two are the same

True

In [351]:
print(x_vec.shape)

(10,)


In [347]:
x_vec_arg = x_vec[None,:] ## row vector 
print(x_vec_arg)
print(x_vec_arg.shape)

[[0 1 2 3 4 5 6 7 8 9]]
(1, 10)


In [348]:
x_vec_arg2 = x_vec[:,None] ## column vector
print(x_vec_arg2)
print(x_vec_arg2.shape)

[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]
(10, 1)


In [354]:
x_vec_arg3 = x_vec[:,:,None] ## will not work, 1->3, needs 2->3 (see below)

IndexError: too many indices for array

In [356]:
x_vec_arg3 = x_vec_arg2[:,:,None]
x_vec_arg3

array([[[0]],

       [[1]],

       [[2]],

       [[3]],

       [[4]],

       [[5]],

       [[6]],

       [[7]],

       [[8]],

       [[9]]])

In [377]:
## concatenate and stacking arrays
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
print(np.concatenate([x, y]))
print(np.append(x,y))
print('vertical stack: \n',np.vstack([x,y]))
print('horizontal stack: \n',np.hstack([x,y]))

[1 2 3 3 2 1]
[1 2 3 3 2 1]
vertical stack: 
 [[1 2 3]
 [3 2 1]]
horizontal stack: 
 [1 2 3 3 2 1]


In [405]:
z = np.array([np.arange(1,10),np.arange(1,10)[::-1]])
print('z:\n',z)
print('vertical stack: \n',np.vstack([z,z]))
print('horizontal stack: \n',np.hstack([z,z]))

z:
 [[1 2 3 4 5 6 7 8 9]
 [9 8 7 6 5 4 3 2 1]]
vertical stack: 
 [[1 2 3 4 5 6 7 8 9]
 [9 8 7 6 5 4 3 2 1]
 [1 2 3 4 5 6 7 8 9]
 [9 8 7 6 5 4 3 2 1]]
horizontal stack: 
 [[1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9]
 [9 8 7 6 5 4 3 2 1 9 8 7 6 5 4 3 2 1]]


In [411]:
## splitting arrays
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [419]:
grid = np.arange(16).reshape((4, 4))
upper, lower = np.vsplit(grid, [2])
left, right = np.hsplit(grid, [2])
print('grid:\n',grid)
print('upper:\n',upper)
print('lower:\n',lower)
print('left:\n',left)
print('right:\n',right)

grid:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
upper:
 [[0 1 2 3]
 [4 5 6 7]]
lower:
 [[ 8  9 10 11]
 [12 13 14 15]]
left:
 [[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
right:
 [[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## Numpy functions, vectorization (UFuncs) for speed
#### https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/02.03-Computation-on-arrays-ufuncs.ipynb

In [430]:
import numpy as np
np.random.seed(0)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output
        
values = np.random.randint(1, 10, size=5)
%timeit -n 100 compute_reciprocals(values) 

10.1 µs ± 299 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [432]:
%timeit -n 100 1/values

2.76 µs ± 1.01 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [436]:
np.arange(5)/np.arange(1,6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [441]:
x = np.arange(9).reshape((3, 3))
print( x ** 2)
print( 2 ** x)

[[ 0  1  4]
 [ 9 16 25]
 [36 49 64]]
[[  1   2   4]
 [  8  16  32]
 [ 64 128 256]]


In [451]:
np.equal(np.power(x,2) ,x**2).sum() == 9

True