# NUMPY

- dtypes -> predetermined type and homogeneous
- Vectorization -> operations which python can push to C directly and avoid overheads associated with python i.e typechecking, if a cant multiply b then python checks if b can multiply a etc. -> these functions are called ufuncs

In [1]:
import numpy as np

In [3]:
a = np.array([1,2,3])

In [4]:
type(a), a.dtype

(numpy.ndarray, dtype('int64'))

In [7]:
a = np.array([1,2,3], dtype=np.uint8)
a.dtype

dtype('uint8')

In [10]:
a = np.array([1,2,3,9_999], dtype=np.uint8)

OverflowError: Python integer 9999 out of bounds for uint8

In [29]:
l = [
    [1,0,0],
    [0,2.5,1],
    [0,0,1]
]

a = np.array(l, dtype=np.uint8)
a

array([[1, 0, 0],
       [0, 2, 1],
       [0, 0, 1]], dtype=uint8)

In [30]:
a.shape

(3, 3)

In [31]:
a.size

9

In [32]:
a[0], a[2]

(array([1, 0, 0], dtype=uint8), array([0, 0, 1], dtype=uint8))

In [33]:
len(a)

3

### creating numpy arrays directly (not from python lists or tupples

In [34]:
np.zeros((3,4), dtype=np.uint16)

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=uint16)

In [35]:
np.ones(5, dtype=np.int16)

array([1, 1, 1, 1, 1], dtype=int16)

In [51]:
np.full(3,3.14, dtype=np.float32)

array([3.14, 3.14, 3.14], dtype=float32)

In [53]:
np.eye(3, dtype=np.uint8)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]], dtype=uint8)

In [54]:
np.eye(5,3, dtype=np.uint32)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0]], dtype=uint32)

In [55]:
np.arange(2,21,2,dtype=np.uint8)

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20], dtype=uint8)

In [57]:
np.linspace(1,100,num=13, dtype=np.float32)
# returns num evenly spaced points between start and stop

array([  1.  ,   9.25,  17.5 ,  25.75,  34.  ,  42.25,  50.5 ,  58.75,
        67.  ,  75.25,  83.5 ,  91.75, 100.  ], dtype=float32)

In [58]:
import math
x_coords = np.linspace(-2 * math.pi, 2 * math.pi, num=50)
x_coords

array([-6.28318531, -6.02672876, -5.77027222, -5.51381568, -5.25735913,
       -5.00090259, -4.74444605, -4.48798951, -4.23153296, -3.97507642,
       -3.71861988, -3.46216333, -3.20570679, -2.94925025, -2.6927937 ,
       -2.43633716, -2.17988062, -1.92342407, -1.66696753, -1.41051099,
       -1.15405444, -0.8975979 , -0.64114136, -0.38468481, -0.12822827,
        0.12822827,  0.38468481,  0.64114136,  0.8975979 ,  1.15405444,
        1.41051099,  1.66696753,  1.92342407,  2.17988062,  2.43633716,
        2.6927937 ,  2.94925025,  3.20570679,  3.46216333,  3.71861988,
        3.97507642,  4.23153296,  4.48798951,  4.74444605,  5.00090259,
        5.25735913,  5.51381568,  5.77027222,  6.02672876,  6.28318531])

In [59]:
y_values = np.array([math.sin(x) for x in x_coords])
y_values

array([ 2.44929360e-16,  2.53654584e-01,  4.90717552e-01,  6.95682551e-01,
        8.55142763e-01,  9.58667853e-01,  9.99486216e-01,  9.74927912e-01,
        8.86599306e-01,  7.40277997e-01,  5.45534901e-01,  3.15108218e-01,
        6.40702200e-02, -1.91158629e-01, -4.33883739e-01, -6.48228395e-01,
       -8.20172255e-01, -9.38468422e-01, -9.95379113e-01, -9.87181783e-01,
       -9.14412623e-01, -7.81831482e-01, -5.98110530e-01, -3.75267005e-01,
       -1.27877162e-01,  1.27877162e-01,  3.75267005e-01,  5.98110530e-01,
        7.81831482e-01,  9.14412623e-01,  9.87181783e-01,  9.95379113e-01,
        9.38468422e-01,  8.20172255e-01,  6.48228395e-01,  4.33883739e-01,
        1.91158629e-01, -6.40702200e-02, -3.15108218e-01, -5.45534901e-01,
       -7.40277997e-01, -8.86599306e-01, -9.74927912e-01, -9.99486216e-01,
       -9.58667853e-01, -8.55142763e-01, -6.95682551e-01, -4.90717552e-01,
       -2.53654584e-01, -2.44929360e-16])

In [63]:
# there is a random module in numpy package, this is NOT the random package
np.random.seed(42)

np.random.random((5,3))

array([[0.37454012, 0.95071431, 0.73199394],
       [0.59865848, 0.15601864, 0.15599452],
       [0.05808361, 0.86617615, 0.60111501],
       [0.70807258, 0.02058449, 0.96990985],
       [0.83244264, 0.21233911, 0.18182497]])

In [65]:
np.random.randint(1,10,50)

array([3, 7, 4, 9, 3, 5, 3, 7, 5, 9, 7, 2, 4, 9, 2, 9, 5, 2, 4, 7, 8, 3,
       1, 4, 2, 8, 4, 2, 6, 6, 4, 6, 2, 2, 4, 8, 7, 9, 8, 5, 2, 5, 8, 9,
       9, 1, 9, 7, 9, 8])

In [66]:
# simulate a die roll
np.random.randint(1, 6+1, 10)

array([1, 4, 3, 3, 1, 3, 3, 1, 3, 5])

In [67]:
# simulate two die roll

np.random.randint(1, 6+1, (10,2))

array([[2, 2],
       [1, 4],
       [1, 4],
       [2, 1],
       [6, 5],
       [3, 4],
       [6, 3],
       [3, 1],
       [3, 5],
       [6, 3]])

# Reshaping

In [110]:
arr = np.arange(1, 7, step=1, dtype=np.uint8)
arr

array([1, 2, 3, 4, 5, 6], dtype=uint8)

In [111]:
arr.shape

(6,)

In [112]:
arr.reshape(2,3)

array([[1, 2, 3],
       [4, 5, 6]], dtype=uint8)

In [113]:
arr.reshape(1,6)

array([[1, 2, 3, 4, 5, 6]], dtype=uint8)

In [114]:
arr.reshape(6,1)

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]], dtype=uint8)

In [115]:
# original array is not modified, just reslotted, elements are still same
arr

array([1, 2, 3, 4, 5, 6], dtype=uint8)

In [116]:
arr_modified = arr.reshape(3,2)
arr_modified

array([[1, 2],
       [3, 4],
       [5, 6]], dtype=uint8)

In [117]:
arr_modified[0] = 100
arr

array([100, 100,   3,   4,   5,   6], dtype=uint8)

In [118]:
arr_modified

array([[100, 100],
       [  3,   4],
       [  5,   6]], dtype=uint8)

In [119]:
np.shares_memory(arr, arr_modified)

True

In [125]:
arr = np.arange(1, 7, step=1, dtype=np.uint8)
arr_modified = arr.reshape(3,2).copy()
arr_modified[0] = 100
print(arr, '\n\n',arr_modified)

[1 2 3 4 5 6] 

 [[100 100]
 [  3   4]
 [  5   6]]


In [126]:
np.shares_memory(arr, arr_modified)

False

# strides = view logic, not data

Strides allow:
1. `Reshaping`: Reinterpreting memory layout with a different shape.
2. `Transposing`: Swapping axes (by changing strides, not data).
3. `Slicing`: Like `arr[:, ::-1]` (reverse columns) — changes strides to negative!

In [147]:
arr = np.array([
    [1, 2, 3],
    [4, 5, 6]
], dtype=np.int32)  # 4 bytes per int32

print(arr.strides, arr.shape)  # Output: (12, 4)

# First stride 12 = move 12 bytes to jump to the next row (3 × 4 bytes)
# Second stride 4 = move 4 bites to jump to the next column (1 int32)

(12, 4) (2, 3)


In [148]:
arr_modified = arr.reshape((3,2))
arr_modified

array([[1, 2],
       [3, 4],
       [5, 6]], dtype=int32)

In [149]:
arr_modified.strides

(8, 4)

In [155]:
arr.reshape(6,).strides

(4,)

In [156]:
arr.reshape(6,)

array([1, 2, 3, 4, 5, 6], dtype=int32)

# Stacking 

In [177]:
a1 = np.arange(1,11, dtype=np.uint8).reshape((2,5))
a2 = np.arange(100,115, dtype=np.uint8).reshape((3,5))
a3 = np.array([0,0,0,0,0])
print(a1,end='\n\n')

print(a2,end='\n\n')

print(a3,end='\n\n\n')

print(np.vstack((a2, a3, a1)))

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]

[[100 101 102 103 104]
 [105 106 107 108 109]
 [110 111 112 113 114]]

[0 0 0 0 0]


[[100 101 102 103 104]
 [105 106 107 108 109]
 [110 111 112 113 114]
 [  0   0   0   0   0]
 [  1   2   3   4   5]
 [  6   7   8   9  10]]


### Latest version of numpy allows `concatenate` which is more modern than `hstack` or `vstack`

In [184]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
np.concatenate((a, b), axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [185]:
a1

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]], dtype=uint8)

In [186]:
a1.astype(np.float32)

array([[ 1.,  2.,  3.,  4.,  5.],
       [ 6.,  7.,  8.,  9., 10.]], dtype=float32)

# Indexing

In [189]:
a = np.random.randint(1, 20, 10, dtype=np.uint8).reshape(5,2)
a

array([[15,  1],
       [15, 15],
       [14,  4],
       [10,  2],
       [ 7, 17]], dtype=uint8)

In [193]:
a[3][1]

np.uint8(2)

In [194]:
a[3,1]

np.uint8(2)

In [195]:
a[3,1] = a[3,1]*10
a

array([[15,  1],
       [15, 15],
       [14,  4],
       [10, 20],
       [ 7, 17]], dtype=uint8)

In [196]:
a[2,1] = 'a'

ValueError: invalid literal for int() with base 10: 'a'

In [197]:
a[2,1]=256
a

OverflowError: Python integer 256 out of bounds for uint8

# Slicing

In [203]:
a = np.random.randint(1,100,9)
a

array([24, 79, 59, 32, 96, 88, 52, 62, 58])

In [204]:
a[0:3]

array([24, 79, 59])

In [206]:
a[2:8:2]

array([59, 96, 52])

In [208]:
a[1::2]

array([79, 32, 88, 62])

In [209]:
a[::-1]

array([58, 62, 52, 88, 96, 32, 59, 79, 24])

In [212]:
a = np.random.randint(1,100,12).reshape(3,4)
a

array([[87, 96, 97,  1],
       [19,  2, 53, 44],
       [90, 32, 70, 32]])

In [213]:
a[:2,1:3]

array([[96, 97],
       [ 2, 53]])

In [215]:
a[1:,::2]

array([[19, 53],
       [90, 70]])

In [224]:
# difference between python list and numpy slice

l = [1,2,3,4]
arr = np.array([1,2,3,4])

print(l, arr)

l_slice = l[1:3]
arr_slice = arr[1:3]

print(l_slice, arr_slice)

l_slice[0] = 10
arr_slice[0] = 10

print(l, arr)

[1, 2, 3, 4] [1 2 3 4]
[2, 3] [2 3]
[1, 2, 3, 4] [ 1 10  3  4]


# Fancy indexing

In [226]:
arr = np.array([1,2,3,4,5,6])
arr

array([1, 2, 3, 4, 5, 6])

In [229]:
arr[np.array([0,3,4])]

array([1, 4, 5])

In [232]:
arr = np.arange(1,26).reshape(5,5)
arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [235]:
# combination of usual indexing and fancy indexing
# fancy index just uses an array instead of reguar index

arr[1:3,np.array([0,3,4])]

array([[ 6,  9, 10],
       [11, 14, 15]])

In [240]:
# when both args are fancy, then numpy takes a zip of both indices i.e
# (0,1) and (3,3)

arr[np.array([0,3]),np.array([1,3])]

array([ 2, 19])

In [241]:
# because of this below wont work 

arr[np.array([0,3]),np.array([1,3,4])]

IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (2,) (3,) 

In [244]:
import csv

with open('data/appl.csv', 'r') as f:
    reader = csv.reader(f, skipinitialspace=True)
    headers = next(reader)
    data = list(reader)

In [246]:
headers

['Symbol', 'Date', 'Close', 'Volume', 'Open', 'High', 'Low']

In [247]:
data

[['AAPL', '10/29/2020', '115.32', '146129200', '112.37', '116.93', '112.2'],
 ['AAPL', '10/28/2020', '111.2', '143937800', '115.05', '115.43', '111.1'],
 ['AAPL', '10/27/2020', '116.6', '92276770', '115.49', '117.28', '114.5399'],
 ['AAPL', '10/26/2020', '115.05', '111850700', '114.01', '116.55', '112.88'],
 ['AAPL', '10/23/2020', '115.04', '82572650', '116.39', '116.55', '114.28'],
 ['AAPL', '10/22/2020', '115.75', '101988000', '117.45', '118.04', '114.59'],
 ['AAPL', '10/21/2020', '116.87', '89945980', '116.67', '118.705', '116.45'],
 ['AAPL', '10/20/2020', '117.51', '124423700', '116.2', '118.98', '115.63'],
 ['AAPL', '10/19/2020', '115.98', '120639300', '119.96', '120.419', '115.66'],
 ['AAPL', '10/16/2020', '119.02', '115393800', '121.28', '121.548', '118.81'],
 ['AAPL', '10/15/2020', '120.71', '112559200', '118.72', '121.2', '118.15'],
 ['AAPL', '10/14/2020', '121.19', '151062300', '121', '123.03', '119.62'],
 ['AAPL', '10/13/2020', '121.1', '262330500', '125.27', '125.39', '119.

In [250]:
data = np.array(data)
data

array([['AAPL', '10/29/2020', '115.32', '146129200', '112.37', '116.93',
        '112.2'],
       ['AAPL', '10/28/2020', '111.2', '143937800', '115.05', '115.43',
        '111.1'],
       ['AAPL', '10/27/2020', '116.6', '92276770', '115.49', '117.28',
        '114.5399'],
       ['AAPL', '10/26/2020', '115.05', '111850700', '114.01', '116.55',
        '112.88'],
       ['AAPL', '10/23/2020', '115.04', '82572650', '116.39', '116.55',
        '114.28'],
       ['AAPL', '10/22/2020', '115.75', '101988000', '117.45', '118.04',
        '114.59'],
       ['AAPL', '10/21/2020', '116.87', '89945980', '116.67', '118.705',
        '116.45'],
       ['AAPL', '10/20/2020', '117.51', '124423700', '116.2', '118.98',
        '115.63'],
       ['AAPL', '10/19/2020', '115.98', '120639300', '119.96', '120.419',
        '115.66'],
       ['AAPL', '10/16/2020', '119.02', '115393800', '121.28', '121.548',
        '118.81'],
       ['AAPL', '10/15/2020', '120.71', '112559200', '118.72', '121.2',
        '11

In [252]:
dates = data[:, 1]
dates

array(['10/29/2020', '10/28/2020', '10/27/2020', '10/26/2020',
       '10/23/2020', '10/22/2020', '10/21/2020', '10/20/2020',
       '10/19/2020', '10/16/2020', '10/15/2020', '10/14/2020',
       '10/13/2020', '10/12/2020', '10/09/2020', '10/08/2020',
       '10/07/2020', '10/06/2020', '10/05/2020', '10/02/2020',
       '10/01/2020', '09/30/2020', '09/29/2020'], dtype='<U10')

In [253]:
from dateutil import parser

dates = [parser.parse(d) for d in dates]
dates

[datetime.datetime(2020, 10, 29, 0, 0),
 datetime.datetime(2020, 10, 28, 0, 0),
 datetime.datetime(2020, 10, 27, 0, 0),
 datetime.datetime(2020, 10, 26, 0, 0),
 datetime.datetime(2020, 10, 23, 0, 0),
 datetime.datetime(2020, 10, 22, 0, 0),
 datetime.datetime(2020, 10, 21, 0, 0),
 datetime.datetime(2020, 10, 20, 0, 0),
 datetime.datetime(2020, 10, 19, 0, 0),
 datetime.datetime(2020, 10, 16, 0, 0),
 datetime.datetime(2020, 10, 15, 0, 0),
 datetime.datetime(2020, 10, 14, 0, 0),
 datetime.datetime(2020, 10, 13, 0, 0),
 datetime.datetime(2020, 10, 12, 0, 0),
 datetime.datetime(2020, 10, 9, 0, 0),
 datetime.datetime(2020, 10, 8, 0, 0),
 datetime.datetime(2020, 10, 7, 0, 0),
 datetime.datetime(2020, 10, 6, 0, 0),
 datetime.datetime(2020, 10, 5, 0, 0),
 datetime.datetime(2020, 10, 2, 0, 0),
 datetime.datetime(2020, 10, 1, 0, 0),
 datetime.datetime(2020, 9, 30, 0, 0),
 datetime.datetime(2020, 9, 29, 0, 0)]

In [256]:
oc = data[:, np.array([4,2])].astype(np.float32)
oc

array([[112.37, 115.32],
       [115.05, 111.2 ],
       [115.49, 116.6 ],
       [114.01, 115.05],
       [116.39, 115.04],
       [117.45, 115.75],
       [116.67, 116.87],
       [116.2 , 117.51],
       [119.96, 115.98],
       [121.28, 119.02],
       [118.72, 120.71],
       [121.  , 121.19],
       [125.27, 121.1 ],
       [120.06, 124.4 ],
       [115.28, 116.97],
       [116.25, 114.97],
       [114.62, 115.08],
       [115.7 , 113.16],
       [113.91, 116.5 ],
       [112.89, 113.02],
       [117.64, 116.79],
       [113.79, 115.81],
       [114.55, 114.09]], dtype=float32)

In [258]:
diff_prices = (oc[:,1] - oc[:,0]) /  oc[:,0] *100
diff_prices

array([ 2.625253  , -3.3463764 ,  0.96112275,  0.9122015 , -1.1598921 ,
       -1.4474219 ,  0.17142761,  1.1273711 , -3.3177688 , -1.8634582 ,
        1.6762111 ,  0.15702681, -3.3288083 ,  3.6148627 ,  1.4659979 ,
       -1.1010742 ,  0.4013253 , -2.195327  ,  2.2737217 ,  0.11515392,
       -0.7225421 ,  1.7751969 , -0.4015772 ], dtype=float32)

# Masking

In [281]:
arr = np.array([10,-10,21,-33, 50,61 ,-6])
arr

array([ 10, -10,  21, -33,  50,  61,  -6])

In [275]:
mask = arr < 0
mask

array([False,  True, False,  True, False, False,  True])

In [276]:
arr[mask]

array([-10, -33,  -7])

In [282]:
arr[arr % 2==0] 

array([ 10, -10,  50,  -6])

In [283]:
arr[(arr>0) & (arr % 2 == 0)]

array([10, 50])

### Dates on which apple closed above 116.0

In [286]:
with open('data/appl.csv', 'r') as f:
    reader = csv.reader(f)
    headers = next(reader)
    data = np.array(list(reader))

In [287]:
data

array([['AAPL', ' 10/29/2020', ' 115.32', ' 146129200', ' 112.37',
        ' 116.93', ' 112.2'],
       ['AAPL', ' 10/28/2020', ' 111.2', ' 143937800', ' 115.05',
        ' 115.43', ' 111.1'],
       ['AAPL', ' 10/27/2020', ' 116.6', ' 92276770', ' 115.49',
        ' 117.28', ' 114.5399'],
       ['AAPL', ' 10/26/2020', ' 115.05', ' 111850700', ' 114.01',
        ' 116.55', ' 112.88'],
       ['AAPL', ' 10/23/2020', ' 115.04', ' 82572650', ' 116.39',
        ' 116.55', ' 114.28'],
       ['AAPL', ' 10/22/2020', ' 115.75', ' 101988000', ' 117.45',
        ' 118.04', ' 114.59'],
       ['AAPL', ' 10/21/2020', ' 116.87', ' 89945980', ' 116.67',
        ' 118.705', ' 116.45'],
       ['AAPL', ' 10/20/2020', ' 117.51', ' 124423700', ' 116.2',
        ' 118.98', ' 115.63'],
       ['AAPL', ' 10/19/2020', ' 115.98', ' 120639300', ' 119.96',
        ' 120.419', ' 115.66'],
       ['AAPL', ' 10/16/2020', ' 119.02', ' 115393800', ' 121.28',
        ' 121.548', ' 118.81'],
       ['AAPL', ' 10/15

In [289]:
dates = np.array([parser.parse(d) for d in data[:,1]])
dates

array([datetime.datetime(2020, 10, 29, 0, 0),
       datetime.datetime(2020, 10, 28, 0, 0),
       datetime.datetime(2020, 10, 27, 0, 0),
       datetime.datetime(2020, 10, 26, 0, 0),
       datetime.datetime(2020, 10, 23, 0, 0),
       datetime.datetime(2020, 10, 22, 0, 0),
       datetime.datetime(2020, 10, 21, 0, 0),
       datetime.datetime(2020, 10, 20, 0, 0),
       datetime.datetime(2020, 10, 19, 0, 0),
       datetime.datetime(2020, 10, 16, 0, 0),
       datetime.datetime(2020, 10, 15, 0, 0),
       datetime.datetime(2020, 10, 14, 0, 0),
       datetime.datetime(2020, 10, 13, 0, 0),
       datetime.datetime(2020, 10, 12, 0, 0),
       datetime.datetime(2020, 10, 9, 0, 0),
       datetime.datetime(2020, 10, 8, 0, 0),
       datetime.datetime(2020, 10, 7, 0, 0),
       datetime.datetime(2020, 10, 6, 0, 0),
       datetime.datetime(2020, 10, 5, 0, 0),
       datetime.datetime(2020, 10, 2, 0, 0),
       datetime.datetime(2020, 10, 1, 0, 0),
       datetime.datetime(2020, 9, 30, 0, 

In [290]:
headers

['Symbol', ' Date', ' Close', ' Volume', ' Open', ' High', ' Low']

In [292]:
ohlc = data[:,np.array([4,5,6,2])].astype(np.float32)
ohlc

array([[112.37  , 116.93  , 112.2   , 115.32  ],
       [115.05  , 115.43  , 111.1   , 111.2   ],
       [115.49  , 117.28  , 114.5399, 116.6   ],
       [114.01  , 116.55  , 112.88  , 115.05  ],
       [116.39  , 116.55  , 114.28  , 115.04  ],
       [117.45  , 118.04  , 114.59  , 115.75  ],
       [116.67  , 118.705 , 116.45  , 116.87  ],
       [116.2   , 118.98  , 115.63  , 117.51  ],
       [119.96  , 120.419 , 115.66  , 115.98  ],
       [121.28  , 121.548 , 118.81  , 119.02  ],
       [118.72  , 121.2   , 118.15  , 120.71  ],
       [121.    , 123.03  , 119.62  , 121.19  ],
       [125.27  , 125.39  , 119.65  , 121.1   ],
       [120.06  , 125.18  , 119.2845, 124.4   ],
       [115.28  , 117.    , 114.92  , 116.97  ],
       [116.25  , 116.4   , 114.5901, 114.97  ],
       [114.62  , 115.55  , 114.13  , 115.08  ],
       [115.7   , 116.12  , 112.25  , 113.16  ],
       [113.91  , 116.65  , 113.55  , 116.5   ],
       [112.89  , 115.37  , 112.22  , 113.02  ],
       [117.64  , 11

In [293]:
ohlc[:,3] > 116.0

array([False, False,  True, False, False, False,  True,  True, False,
        True,  True,  True,  True,  True,  True, False, False, False,
        True, False,  True, False, False])

In [301]:
np.hstack((dates[ohlc[:,3] > 116.0].reshape(11,1),ohlc[ohlc[:,3] > 116.0]))

array([[datetime.datetime(2020, 10, 27, 0, 0), 115.48999786376953,
        117.27999877929688, 114.53990173339844, 116.5999984741211],
       [datetime.datetime(2020, 10, 21, 0, 0), 116.66999816894531,
        118.70500183105469, 116.44999694824219, 116.87000274658203],
       [datetime.datetime(2020, 10, 20, 0, 0), 116.19999694824219,
        118.9800033569336, 115.62999725341797, 117.51000213623047],
       [datetime.datetime(2020, 10, 16, 0, 0), 121.27999877929688,
        121.5479965209961, 118.80999755859375, 119.0199966430664],
       [datetime.datetime(2020, 10, 15, 0, 0), 118.72000122070312,
        121.19999694824219, 118.1500015258789, 120.70999908447266],
       [datetime.datetime(2020, 10, 14, 0, 0), 121.0, 123.02999877929688,
        119.62000274658203, 121.19000244140625],
       [datetime.datetime(2020, 10, 13, 0, 0), 125.2699966430664,
        125.38999938964844, 119.6500015258789, 121.0999984741211],
       [datetime.datetime(2020, 10, 12, 0, 0), 120.05999755859375,
  

In [299]:
dates[ohlc[:,3] > 116.0].reshape(11,1)

array([[datetime.datetime(2020, 10, 27, 0, 0)],
       [datetime.datetime(2020, 10, 21, 0, 0)],
       [datetime.datetime(2020, 10, 20, 0, 0)],
       [datetime.datetime(2020, 10, 16, 0, 0)],
       [datetime.datetime(2020, 10, 15, 0, 0)],
       [datetime.datetime(2020, 10, 14, 0, 0)],
       [datetime.datetime(2020, 10, 13, 0, 0)],
       [datetime.datetime(2020, 10, 12, 0, 0)],
       [datetime.datetime(2020, 10, 9, 0, 0)],
       [datetime.datetime(2020, 10, 5, 0, 0)],
       [datetime.datetime(2020, 10, 1, 0, 0)]], dtype=object)

In [298]:
ohlc[ohlc[:,3] > 116.0].shape

(11, 4)

In [305]:
dates = np.array([d for d in data[:,1]])
dates[ohlc[:,3] > 116.0].reshape(11,1)

array([[' 10/27/2020'],
       [' 10/21/2020'],
       [' 10/20/2020'],
       [' 10/16/2020'],
       [' 10/15/2020'],
       [' 10/14/2020'],
       [' 10/13/2020'],
       [' 10/12/2020'],
       [' 10/09/2020'],
       [' 10/05/2020'],
       [' 10/01/2020']], dtype='<U11')

In [306]:
np.hstack((dates[ohlc[:,3] > 116.0].reshape(11,1),ohlc[ohlc[:,3] > 116.0]))

array([[' 10/27/2020', '115.49', '117.28', '114.5399', '116.6'],
       [' 10/21/2020', '116.67', '118.705', '116.45', '116.87'],
       [' 10/20/2020', '116.2', '118.98', '115.63', '117.51'],
       [' 10/16/2020', '121.28', '121.548', '118.81', '119.02'],
       [' 10/15/2020', '118.72', '121.2', '118.15', '120.71'],
       [' 10/14/2020', '121.0', '123.03', '119.62', '121.19'],
       [' 10/13/2020', '125.27', '125.39', '119.65', '121.1'],
       [' 10/12/2020', '120.06', '125.18', '119.2845', '124.4'],
       [' 10/09/2020', '115.28', '117.0', '114.92', '116.97'],
       [' 10/05/2020', '113.91', '116.65', '113.55', '116.5'],
       [' 10/01/2020', '117.64', '117.72', '115.83', '116.79']],
      dtype='<U32')

# Universal functions (vectorized functions)

In [307]:
arr_1 = np.array([1,2,3,4,5])
arr_2 = np.arange(1,6)

arr_1 + arr_2

array([ 2,  4,  6,  8, 10])

In [308]:
np.add(arr_1, arr_2)

array([ 2,  4,  6,  8, 10])

In [309]:
np.multiply(arr_1, arr_2)

array([ 1,  4,  9, 16, 25])

In [310]:
arr_1 ** arr_2

array([   1,    4,   27,  256, 3125])

In [311]:
arr = np.linspace(-2 * math.pi, 2 * math.pi, 10)
np.sin(arr)

array([ 2.44929360e-16,  9.84807753e-01,  3.42020143e-01, -8.66025404e-01,
       -6.42787610e-01,  6.42787610e-01,  8.66025404e-01, -3.42020143e-01,
       -9.84807753e-01, -2.44929360e-16])

In [313]:
from time import perf_counter

In [314]:
l = list(range(1, 1_000_000))

In [315]:
start = perf_counter()
new_list = []
for el in l:
    new_list.append(1/el)
end = perf_counter()
print(f'Elapsed :{end - start}')

Elapsed :0.08828599995467812


In [316]:
start = perf_counter()
new_list = [1/el for el in l]
end = perf_counter()
print(f'Elapsed :{end - start}')

Elapsed :0.06646487500984222


In [318]:
np_l = np.array(l, dtype=np.float32)
start = perf_counter()
new_list = 1/np_l
end = perf_counter()
print(f'Elapsed :{end - start}')

Elapsed :0.0010412499541416764


In [321]:
np.amax(np.array([1,2,3,4]))

np.int64(4)

In [329]:
arr = np.arange(1,21).reshape(5,4)
print(arr)
np.amax(arr,axis=0)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]
 [17 18 19 20]]


array([17, 18, 19, 20])

In [331]:
np.sum(arr,axis=1)

array([10, 26, 42, 58, 74])

In [333]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [334]:
bins = [0, 3, 8, 10]
# creates bins [0, 3) [3, 8), [8,11)

In [336]:
np.histogram(a, bins)
# gives frequency of each bin and bins array

(array([2, 0, 0]), array([ 0,  3,  8, 10]))

In [338]:
np.histogram(a, 3)
# 3 evenly spaced bins

(array([5, 2, 5]), array([ 1., 33., 65., 97.]))