# Numpy Refresher with parts taken from 
###### https://jakevdp.github.io/PythonDataScienceHandbook/02.01-understanding-data-types.html

In [4]:
import numpy as np

In [14]:
for i in range(1,5,1): ##from 1 to 10 excluding 10, range(1,5,1)
    print(i)

1
2
3
4


In [None]:
for i in enumerate(10):
    print (i)

In [None]:
for i in range(5): 
    print(i)

In [None]:
for i in np.linspace(1,10,10):
    print(i)

### Note: Storing an integer (eg) in Python incurs <font color=red>overhead </font> as compared to an integer in a compiled language like C
- a reference count that helps Python silently handle memory allocation and deallocation
- the type of the variable
- specifies the size of the following data members
- contains the actual integer value that we expect the Python variable to represent.

In [None]:
L = list(range(10))
print(type(L[0]))
print(type(L))

In [None]:
L2 = [str(c) for c in L] ## for loop style initializer
L2

In [None]:
L3 = [x for x in range(10)] ## np.array([x ...]) will make it array 
L3

In [None]:
L4 = [True, "2", 3.0, 4] # heterogeneous lists,
[type(item) for item in L4] 

### But this flexibility comes at a cost: to allow these flexible types, each item in the list must contain its own type info, reference count, and other information–that is, each item is a complete Python object

### Fixed-type NumPy-style arrays lack this flexibility, but are much more efficient for storing and manipulating data.

In [None]:
list(map(type,L4)) ## This is the same as above 

In [None]:
kobe = np.array([1,2,3,4])
type(kobe[0])

In [None]:
kobe = np.array([1.0,2,3,4])
type(kobe[0])

In [None]:
kobe = np.array([1,2,3,4],dtype='float32')
type(kobe[0])

In [None]:
kobe = np.array([range(i, i + 3) for i in [2, 4, 6]])
kobe

In [None]:
np.mean(kobe,axis=0)

In [None]:
print('the maximum of each column is',*np.max(kobe,axis=0))

In [None]:
print('the maximum of each row is',*np.max(kobe,axis=1))

In [None]:
print(*map(np.mean,kobe))

In [None]:
print(*map(lambda x: np.mean(x),kobe))

In [None]:
fruits = ['lemon', 'pear', 'watermelon', 'tomato']
print(fruits)
print(*fruits,sep=', ')

In [None]:
np.full((3,3,3),np.pi)

In [None]:
np.arange(0,20,2) ## similar to `seq` function is R seq(0,20,by=2)

In [None]:
np.linspace(0,20,2)## similar to `seq` function is R seq(0,20,length.out=2)

In [None]:
np.random.normal(0,1,(3,3,3)) # mean 0 and std of 1

In [None]:
np.random.random((3, 3, 3))

In [None]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that memory location
np.empty(5)

In [None]:
np.random.seed(2021)
np.random.randint(0, 10, (3, 3, 3))

In [None]:
np.zeros(10, dtype='int16') ## equivalent to np.zeros(10, dtype=np.int16)

In [None]:
x = np.array([np.arange(1,10,1)]).reshape(3,3)
print('x=','\n'
,x)
y = np.array([np.arange(1,13,1)]).reshape(2,6)
print('y=','\n', y)

In [None]:
x[:,None,:] ## imagine it as a 3d cube

In [None]:
x[:,None,:][:,:,1] ## the second slice of the 3d cube

In [None]:
x[:,:,None] ## 3d cube with just the first element being the x

In [None]:
## normalizing the rows by its row sums
x_norm = (x.T/np.sum(x,axis=1)).T
print('normalized array:',"\n" ,x_norm)
print('row sum is (should be 1): ', np.sum(x_norm,axis=1))

In [None]:
np.sum(x,1)[:,None] ## note: np.sum(x,1).T won't do the following

In [None]:
np.sum(x,axis=1).reshape(3,1) ## this will do the same as above

In [None]:
x_norm = x/np.sum(x,axis=1,keepdims=True) ## 
print('normalized array by row:',"\n" ,x_norm)
print('row sum is (should be 1):',"\n", np.sum(x_norm,axis=1,keepdims=True))

In [None]:
np.sum(x,axis=0,keepdims=True)

In [None]:
x_norm = x/np.sum(x,axis=0,keepdims=True) ## 
print('normalized array by column:',"\n" ,x_norm)
print('col sum is (should be 1): ', np.sum(x_norm,axis=0,keepdims=True))

## Indexing in numpy array

#### unlike Python lists, NumPy arrays have a fixed type. 

In [None]:
np.random.seed(2021)
x_int = np.random.randint(0,10,(3,4))
x_int

In [None]:
x_int[2,-1]

In [None]:
# x[start:stop:step]
x_int[0:2,:]

In [None]:
x_int[0:2,1::2]

In [None]:
x_int[1:,::-1]# all elements, reversed for the second and third row

In [None]:
x_int[1:,::-2]# reversed every other for the second and third row

In [None]:
x_int[::-1, ::-1]# reversed the row and column all together

In [None]:
## won't change the type unlike list
x_int[0,0] = np.pi
x_int

In [None]:
print('the shape of x is:', '\n', x_int.shape)
print('the size of x is:','\n', x_int.size)

#### Subarrays as no-copy views
One important–and extremely useful–thing to know about array slices is that they return views rather than copies of the array data. This is one area in which NumPy array slicing differs from Python list slicing: in lists, slices will be copies. 

In [None]:
x_int_sub = x_int[:2,:2]
x_int_sub

In [None]:
x_int_sub[0,0] = 300
x_int_sub

In [None]:
x_int ## original copy has also been changed

In [None]:
###How to actually make a copy of np array object
x_int_sub_copy = x_int[:2,:2].copy()
x_int_sub_copy

In [None]:
x_int_sub_copy[0,0] = 0
x_int_sub_copy

In [None]:
x_int_sub

In [None]:
x_vec = np.array(range(10))
x_vec

In [None]:
x_vec.reshape((10,1)) ## same as x_vec[:,None]

In [None]:
 np.newaxis is None #btw these two are the same

In [None]:
print(x_vec.shape)

In [None]:
x_vec_arg = x_vec[None,:] ## row vector 
print(x_vec_arg)
print(x_vec_arg.shape)

In [None]:
x_vec_arg2 = x_vec[:,None] ## column vector
print(x_vec_arg2)
print(x_vec_arg2.shape)

In [None]:
x_vec_arg3 = x_vec[:,:,None] ## will not work, 1->3, needs 2->3 (see below)

In [None]:
x_vec_arg3 = x_vec_arg2[:,:,None]
x_vec_arg3

In [None]:
## concatenate and stacking arrays
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
print(np.concatenate([x, y]))
print(np.append(x,y))
print('vertical stack: \n',np.vstack([x,y]))
print('horizontal stack: \n',np.hstack([x,y]))

In [None]:
z = np.array([np.arange(1,10),np.arange(1,10)[::-1]])
print('z:\n',z)
print('vertical stack: \n',np.vstack([z,z]))
print('horizontal stack: \n',np.hstack([z,z]))

In [None]:
## splitting arrays
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

In [None]:
grid = np.arange(16).reshape((4, 4))
upper, lower = np.vsplit(grid, [2])
left, right = np.hsplit(grid, [2])
print('grid:\n',grid)
print('upper:\n',upper)
print('lower:\n',lower)
print('left:\n',left)
print('right:\n',right)

## Numpy functions, vectorization (UFuncs) for speed
#### https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/02.03-Computation-on-arrays-ufuncs.ipynb

In [None]:
import numpy as np
np.random.seed(0)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output
        
values = np.random.randint(1, 10, size=5)
%timeit -n 100 compute_reciprocals(values) 

In [None]:
%timeit -n 100 1/values

In [None]:
np.arange(5)/np.arange(1,6)

In [None]:
x = np.arange(9).reshape((3, 3))
print( x ** 2)
print( 2 ** x)

In [None]:
np.equal(np.power(x,2) ,x**2).sum() == 9

In [2]:
## easy printing
a = 3
b = 4
print(f"a = {a} and b = {b}")

a = 3 and b = 4


In [6]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [7]:
np.empty(5)

array([1.30290084e-311, 6.01334515e-154, 3.54557136e+246, 7.26581901e+223,
       1.96086582e+243])

## Intialize large arrays first with np.empty to save time for memory look up, similar to R

#### https://colab.research.google.com/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/02.03-Computation-on-arrays-ufuncs.ipynb#scrollTo=6w6XohaGjSL9

In [32]:
def init_no (end):
    out = []
    for i in range(end):
        out = np.append(out,i**2)
    return out
%timeit -n 100 init_no(1000)        

3.76 ms ± 50.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [18]:
def init_yes (end):
    out = np.empty(end)
    for i in range(end):
        out[i] = i ** 2
    return out
%timeit -n 100 init_yes(1000)

359 µs ± 5.08 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [36]:
np.random.seed(2021)
temp_yes = init_yes(1000)
np.random.seed(2021)
temp_no = init_no(1000)
np.sum(temp_no == temp_yes) 

1000

#### Works with array view too

In [48]:
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 10, out=y)
print(y)

[ 0. 10. 20. 30. 40.]


In [None]:
y = np.zeros(10)
np.power(2, x, out=y[::2])
print(y)

If we had instead written ``y[::2] = 2 ** x``, this would have resulted in the creation of a temporary array to hold the results of ``2 ** x``, followed by a second operation copying those values into the ``y`` array.


In [52]:
x = np.arange(1, 6)
np.prod(x)

120

In [None]:
x = np.arange(1, 6)
np.add.reduce(x) ## same as np.sum(x)
np.multiply.reduce(x) ## same as np.prod(x)