<a href="https://colab.research.google.com/github/gimquokka/ML/blob/master/Numpy_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NumPy Tutorial - NumPy basics

In [0]:
import numpy as np
from io import StringIO

## Data types

In [0]:
x = np.array([1, 2, 3], dtype= 'uint8')

print(x.astype(float).dtype)

np.int8(x).dtype??

float64
Object `dtype` not found.


In [0]:
d = np.dtype(int)
np.issubdtype(d, np.integer)

# np.integer?
np.issubdtype(d, np.float)
np.float?


  """


In [0]:
a = np.array([1, 2, 3])
a[1] == 2

True

In [0]:
print(np.power(100, 50, dtype = np.int64))
print(np.power(100, 50, dtype = np.int32))

print(np.power(100, 50, dtype = np.float64))

np.iinfo(np.int32)
np.iinfo(np.int64)

0
0
1e+100


iinfo(min=-9223372036854775808, max=9223372036854775807, dtype=int64)

In [0]:
np.finfo(np.longdouble)

finfo(resolution=1e-18, min=-1.189731495357231765e+4932, max=1.189731495357231765e+4932, dtype=float128)

In [0]:
1 + np.finfo(np.longdouble).eps

1.0000000000000000001

## Array creation

In [0]:
x = np.array([[1, 2,3], (1, 2, 3),1,2])
print(x)
print(x[3])

[list([1, 2, 3]) (1, 2, 3) 1 2]
2


In [0]:
x = np.array([[1+1j, 1+13j]])
print(x)

[[1. +1.j 1.+13.j]]


In [0]:
np.indices([10,4])

array([[[0, 0, 0, 0],
        [1, 1, 1, 1],
        [2, 2, 2, 2],
        [3, 3, 3, 3],
        [4, 4, 4, 4],
        [5, 5, 5, 5],
        [6, 6, 6, 6],
        [7, 7, 7, 7],
        [8, 8, 8, 8],
        [9, 9, 9, 9]],

       [[0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3],
        [0, 1, 2, 3]]])

## I/O with Numpy

In [0]:
data = u'1, 2, 3\n4, 5, 6'

np.genfromtxt(StringIO(data))

array([[nan, nan,  3.],
       [nan, nan,  6.]])

In [0]:
data = u"  1  2  3\n  4  5 67\n890123  4"
np.genfromtxt(StringIO(data), delimiter=3)

array([[  1.,   2.,   3.],
       [  4.,   5.,  67.],
       [890., 123.,   4.]])

In [0]:
data = u"123456789\n   4  7 9\n   4567 9"
np.genfromtxt(StringIO(data), delimiter=(4, 3, 2))

array([[1234.,  567.,   89.],
       [   4.,    7.,    9.],
       [   4.,  567.,    9.]])

In [0]:
data = u'1, abc  , 1 \n 3, xxx, 23113 '
np.genfromtxt(StringIO(data), delimiter=',', dtype='|U5')

array([['1', ' abc ', ' 1'],
       ['3', ' xxx', ' 2311']], dtype='<U5')

In [0]:
data = u'1, abc  , 1 \n 3, xxx, 23113 '
np.genfromtxt(StringIO(data), delimiter=',', dtype='|U5', autostrip='True')

array([['1', 'abc', '1'],
       ['3', 'xxx', '23113']], dtype='<U5')

In [0]:
data = """
# Hello !
# Skip me too !
1, 2
3, 4
5, 6
#What the fuck!
5, 8
# Hi, there?
"""
# print(np.dtype(data))
?np.genfromtxt(StringIO(data), delimiter=",")

In [0]:
data = u"\n".join(str(i) for i in range(10))
print(data)

0
1
2
3
4
5
6
7
8
9


In [0]:
np.genfromtxt(StringIO(data))

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [0]:
np.genfromtxt(StringIO(data), skip_header=2, skip_footer=2)

array([2., 3., 4., 5., 6., 7.])

In [0]:
data = u'1 2 3 4\n5 6 7 8'
np.genfromtxt(StringIO(data), names = 'a, b, c,d', usecols = ('a', 'c'))

array([(1., 3.), (5., 7.)], dtype=[('a', '<f8'), ('c', '<f8')])

In [0]:
np.genfromtxt(StringIO(data), usecols=[0, -1])

array([[1., 4.],
       [5., 8.]])

In [0]:
?np.dtype(str)

In [0]:
data = StringIO('1 2 3 3\n 4 5 6 6')
np.genfromtxt(data, dtype=[(_, int) for _ in 'abcd'])

array([(1, 2, 3, 3), (4, 5, 6, 6)],
      dtype=[('a', '<i8'), ('b', '<i8'), ('c', '<i8'), ('d', '<i8')])

In [0]:
np.genfromtxt(data, names="1, 2, 3, 4")

  """Entry point for launching an IPython kernel.


array([], dtype=[('1', '<f8'), ('2', '<f8'), ('3', '<f8'), ('4', '<f8')])

In [0]:
data = StringIO('Skip this line!Bamm!!\n #m n l s \n1 2 3 4\n5 6 7 8')
np.genfromtxt(data, skip_header=1, names = True)

array([(1., 2., 3., 4.), (5., 6., 7., 8.)],
      dtype=[('m', '<f8'), ('n', '<f8'), ('l', '<f8'), ('s', '<f8')])

In [0]:
data = StringIO("1 2 3 4\n5 6 7 8")
# ndtype=[('a', int), ('b', int), ('c', int)]
# names = ["alpha", "Beta", "gamma"]
# np.genfromtxt(data, dtype=ndtype)

In [0]:
ndtype = (int, float, float)
np.genfromtxt(data, dtype = ndtype)

array([(1, 2., 3.), (5, 6., 7.)],
      dtype=[('f0', '<i8'), ('f1', '<f8'), ('f2', '<f8')])

In [0]:
data = StringIO("1 2 3 4\n5 6 7 8")
np.genfromtxt(data, dtype=(int, int, float, int), names = 'apa')

array([(1, 2, 3., 4), (5, 6, 7., 8)],
      dtype=[('apa', '<i8'), ('f0', '<i8'), ('f1', '<f8'), ('f2', '<i8')])

In [0]:
data = StringIO("1 2 3 4 5\n6 7 8 9 10")
np.genfromtxt(data, dtype=(int, int, int, float), defaultfmt='alpha_%02i')

array([(1, 2, 3, 4.), (6, 7, 8, 9.)],
      dtype=[('alpha_00', '<i8'), ('alpha_01', '<i8'), ('alpha_02', '<i8'), ('alpha_03', '<f8')])

In [0]:
convertfunc = lambda x: float(x.strip(b"%"))/100.

data = u"1, 2.34%, 45.\n6, 78.9%, 1"
names = ('i', 'p', 'n')

# np.genfromtxt(StringIO(data), delimiter=',', names = names)

In [0]:
convertfunc = lambda x: float(x.strip(b"%"))/100.
data = u"1, 2.3%, 45.\n6, 78.9%, 0"
names = ("i", "p", "n")
# General case .....
np.genfromtxt(StringIO(data), delimiter=",", names=names)

array([(1., nan, 45.), (6., nan,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [0]:
convertfunc = lambda x: float(x.strip(b"%"))/100.
data = u"1, 2.3%, 45.\n6, 78.9%, 0"
names = ("i", "p", "n")
# General case .....
np.genfromtxt(StringIO(data), delimiter=",", names=names, converters={"p": convertfunc})

array([(1., 0.023, 45.), (6., 0.789,  0.)],
      dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])

In [0]:
data = u'1, , 3\n 4, 5,6'
convert = lambda x:float(x.strip() or -999)
np.genfromtxt(StringIO(data), delimiter=',', converters={1: convert})

array([[   1., -999.,    3.],
       [   4.,    5.,    6.]])

In [0]:
data = u"???, N/A, 1, 4\n ???, 2, !, 3"

keywards = dict(delimiter = ",",
                dtype = int,
                names = 'Alpha, Beta, gamma, imma',
                missing_values = {0:123, "Beta":"N/A", -2: "!", -1: " "},
                # filling_values={0: 100, 'Beta': 10000, -2: 10000000, -1: 3}
                )

np.genfromtxt(StringIO(data), usemask = True,**keywards)

masked_array(data=[(-1, --, 1, 4), (-1, 2, --, 3)],
             mask=[(False,  True, False, False),
                   (False, False,  True, False)],
       fill_value=(999999, 999999, 999999, 999999),
            dtype=[('Alpha', '<i8'), ('Beta', '<i8'), ('gamma', '<i8'), ('imma', '<i8')])

## Indexing

In [0]:
x = np.arange(10)
print(x)
print(x.reshape(5, 2))
print(x)
x.shape = (5, 2)
print(x)
print(x)

[0 1 2 3 4 5 6 7 8 9]
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
[0 1 2 3 4 5 6 7 8 9]
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]


In [0]:
x[0][0] = 100

In [0]:
x = np.arange(10)
b = x[np.array([2, 2, 3,5])]
b[1] =100
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [0]:
x[np.array([[1, 2],[3, 9]])]

array([[1, 2],
       [3, 9]])

In [0]:
y = np.arange(35).reshape(5,7)
print(y)
y[[0,2,4]]

[[ 0  1  2  3  4  5  6]
 [ 7  8  9 10 11 12 13]
 [14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27]
 [28 29 30 31 32 33 34]]


array([[ 0,  1,  2,  3,  4,  5,  6],
       [14, 15, 16, 17, 18, 19, 20],
       [28, 29, 30, 31, 32, 33, 34]])

In [0]:
b = y>15
print(b)
b[:,5]
y[b[:, 5]]



[[False False False False False False False]
 [False False False False False False False]
 [False False  True  True  True  True  True]
 [ True  True  True  True  True  True  True]
 [ True  True  True  True  True  True  True]]


array([[14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27],
       [28, 29, 30, 31, 32, 33, 34]])

In [0]:
x = np.arange(30).reshape(3, 5, 2)
print(x)
b = np.array([[[True, False],[True, False],[True, False],[True,True],[False, False]], [[True, False],[True, False],[True, False],[True,True],[False, False]], [[True, False],[True, False],[True, False],[True,True],[False, False]]])
print(b)

[[[ 0  1]
  [ 2  3]
  [ 4  5]
  [ 6  7]
  [ 8  9]]

 [[10 11]
  [12 13]
  [14 15]
  [16 17]
  [18 19]]

 [[20 21]
  [22 23]
  [24 25]
  [26 27]
  [28 29]]]
[[[ True False]
  [ True False]
  [ True False]
  [ True  True]
  [False False]]

 [[ True False]
  [ True False]
  [ True False]
  [ True  True]
  [False False]]

 [[ True False]
  [ True False]
  [ True False]
  [ True  True]
  [False False]]]


In [0]:
print(x[b])

[ 0  2  4  6  7 10 12 14 16 17 20 22 24 26 27]


In [0]:
print(x.shape)
print(x)

(3, 5, 2)
[[[ 0  1]
  [ 2  3]
  [ 4  5]
  [ 6  7]
  [ 8  9]]

 [[10 11]
  [12 13]
  [14 15]
  [16 17]
  [18 19]]

 [[20 21]
  [22 23]
  [24 25]
  [26 27]
  [28 29]]]


In [0]:
x[[True, False, True],:, 1]

array([[ 1,  3,  5,  7,  9],
       [21, 23, 25, 27, 29]])

In [0]:
x[:][[0,2]][:,:,1]

array([[ 1,  3,  5,  7,  9],
       [21, 23, 25, 27, 29]])

In [0]:
b = x > 20
print(x.shape)
# print(x[b[:,1],1:3 ])
print(x[np.array([True, False, True]), 1:3])
# print(b)
# print()
# print(b.shape)
# print(y[b[:, 5]])

(3, 5, 2)
[[[ 2  3]
  [ 4  5]]

 [[22 23]
  [24 25]]]


In [0]:
x = np.arange(10).reshape(2, 5)
x

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [0]:
x.shape
print(x[:,np.newaxis,:].shape)
print(x[:,np.newaxis,:])

(2, 1, 5)
[[[0 1 2 3 4]]

 [[5 6 7 8 9]]]


In [0]:
x = np.arange(5)
print(x[:, np.newaxis])
print()
print(x[np.newaxis, :])
print()
print(x[np.newaxis, :]+x[:, np.newaxis])

[[0]
 [1]
 [2]
 [3]
 [4]]

[[0 1 2 3 4]]

[[0 1 2 3 4]
 [1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]
 [4 5 6 7 8]]


In [0]:
z = np.arange(81).reshape(3,3,3,3)
print(z)

print(z[1,:, :,2])
print(z[1, ...,2])

[[[[ 0  1  2]
   [ 3  4  5]
   [ 6  7  8]]

  [[ 9 10 11]
   [12 13 14]
   [15 16 17]]

  [[18 19 20]
   [21 22 23]
   [24 25 26]]]


 [[[27 28 29]
   [30 31 32]
   [33 34 35]]

  [[36 37 38]
   [39 40 41]
   [42 43 44]]

  [[45 46 47]
   [48 49 50]
   [51 52 53]]]


 [[[54 55 56]
   [57 58 59]
   [60 61 62]]

  [[63 64 65]
   [66 67 68]
   [69 70 71]]

  [[72 73 74]
   [75 76 77]
   [78 79 80]]]]
[[29 32 35]
 [38 41 44]
 [47 50 53]]
[[29 32 35]
 [38 41 44]
 [47 50 53]]


In [0]:
x = np.arange(10)
x[2:9] = 1
x

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 9])

In [0]:
x[2:7] = np.arange(5)
x

array([0, 1, 0, 1, 2, 3, 4, 1, 1, 9])

In [0]:
x[1]= 100.23
x
x[1] = 1.3j


TypeError: ignored

In [0]:
z = np.arange(81).reshape(3,3,3,3)
print(z)
indices = (2, 1,1,1)
z[indices]

In [0]:
z = np.arange(81).reshape(3,3,3,3)
print(z)
indices = (2, 1,1,slice(1,3))
z[indices]

In [0]:
z = np.arange(81).reshape(3,3,3,3)
print(z)
indices = (2, ...,1)
z[indices]

In [0]:
np.nonzero(z)

In [0]:
z[[1,1,1,1]]
z[(1,1,1,1)]


## Broadcasting

In [0]:
x = np.arange(5)
print(x.shape)
xx = x[:, np.newaxis]

y= np.arange(100).reshape(10,1, 10)
z = np.ones((5,3))
print(x)
print()
print(y)
print()
# print(x + y )
print(x + z)

## Byte-swapping

In [0]:
a = bytearray([1, 2, 3, 4])
a

In [0]:
swapped_end_arr = big_end_arr.byteswap().newbyteorder()
swapped_end_arr[0]

## Structured arrays

In [0]:
x = np.array([1.3], dtype= [('Test', 'i4')])

print(x.dtype)
print(x)

In [0]:
np.dtype([('x', 'f4'), ('y',np.float), ('z','f4',(2,2))])

dtype([('x', '<f4'), ('y', '<f8'), ('z', '<f4', (2, 2))])

In [0]:
np.dtype([('', 'f4'), ('','i4'), ('','f4')])

dtype([('f0', '<f4'), ('f1', '<i4'), ('f2', '<f4')])

In [0]:
np.dtype('i8, f4, S3')

dtype([('f0', '<i8'), ('f1', '<f4'), ('f2', 'S3')])

In [0]:
np.dtype('3int16, float32, (2, 3)f4')

dtype([('f0', '<i2', (3,)), ('f1', '<f4'), ('f2', '<f4', (2, 3))])

In [0]:
np.dtype({'names':['col1', 'col2'], 'formats':['i4', 'f4']})


dtype([('col1', '<i4'), ('col2', '<f4')])

In [0]:
data=np.dtype({'names': ['col1', 'col2'], 'formats': [float, 'f4']})

In [0]:
data.names

('col1', 'col2')

In [0]:
data.fields

mappingproxy({'col1': (dtype('float64'), 0), 'col2': (dtype('float32'), 8)})

In [0]:
# data = np.array([1, 2,3, 4])
data

AttributeError: ignored

In [0]:
def print_offsets(d):
  print("Offsets: ", [d.fields[name][1] for name in d.names])
  print('Itemsizes: ', d.itemsize)

print_offsets(np.dtype('u1, u2, f4, i4, f8, f4'))

Offsets:  [0, 1, 3, 7, 11, 19]
Itemsizes:  23


In [0]:
print_offsets(np.dtype('u1, u2, f4, i4, f8, f4', align= True))

Offsets:  [0, 2, 4, 8, 16, 24]
Itemsizes:  32


In [0]:
data = np.dtype([(('my title', 'name'), 'f4')])
data.fields

mappingproxy({'my title': (dtype('float32'), 0, 'my title'),
              'name': (dtype('float32'), 0, 'my title')})

In [0]:
for name in data.names:
  print(data.fields[name][:])

(dtype('float32'), 0, 'my title')


In [0]:
x = np.array([(1, 2, 3), (4,5,6)], dtype='i8,f4,f8')
# x[1] = (4, 5)

In [0]:
x = np.ones(1, dtype='i8, f4, ?, S1')
print(x)
x[:] = 3
print(x)

[(1, 1.,  True, b'1')]
[(3, 3.,  True, b'3')]


In [0]:
x[:] = np.arange(1)
print(x)
x

[(0, 0., False, b'0')]


array([(0, 0., False, b'0')],
      dtype=[('f0', '<i8'), ('f1', '<f4'), ('f2', '?'), ('f3', 'S1')])

In [0]:
twofield = np.zeros(2, dtype=[('A', 'i4'), ('B', 'f4')])
print(twofield)

onefield = np.zeros(2, dtype=[('A', 'f4')])
print(onefield)

nonstruct = np.zeros(2, dtype='i4')
print(nonstruct)

nonstruct[:] = onefield
print(nonstruct)

nonstruct[:] = twofield
print(nonstruct)

[(0, 0.) (0, 0.)]
[(0.,) (0.,)]
[0 0]
[0 0]


TypeError: ignored

In [0]:
a = np.zeros(2, dtype=[('a', 'i4'), ('b', 'f4')])
print(a)
b = np.ones(2, dtype=[('x', float), ('y', 'i4')])
print(b)

b[:] = a
print(b)

[(0, 0.) (0, 0.)]
[(1., 1) (1., 1)]
[(0., 0) (0., 0)]


In [0]:
a['a'] = 10
print(a)

[(10, 0.) (10, 0.)]


In [0]:
a['a']

array([10, 10], dtype=int32)

In [0]:
x = np.zeros(4,dtype = [('a', 'i4'), ('b', 'f8',(3, 3))])
print(x['b'].shape)

(4, 3, 3)


In [0]:
print(x)

[(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])
 (0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])
 (0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])
 (0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])]


In [0]:
a = np.zeros(4, dtype= [('a', 'i4'), ('b', 'f4'), ('c', 'f4'), ('d', 'i4')])
print(a)
print(a[['a', 'b']])

[(0, 0., 0., 0) (0, 0., 0., 0) (0, 0., 0., 0) (0, 0., 0., 0)]
[(0, 0.) (0, 0.) (0, 0.) (0, 0.)]


In [0]:
a[['a', 'b']] = 1, 2
print(a)
a[['b', 'a']] = a[['a', 'b']]
print(a)

[(1, 2., 0., 0) (1, 2., 0., 0) (1, 2., 0., 0) (1, 2., 0., 0)]
[(2, 1., 0., 0) (2, 1., 0., 0) (2, 1., 0., 0) (2, 1., 0., 0)]


In [0]:
a = np.zeros((2, 2), dtype = [('a', 'f4'), ('b', 'i4')])
b = np.ones((2, 2), dtype =[('a', 'f4'), ('b', 'i4')])

a == b

array([[False, False],
       [False, False]])

In [0]:
record_a = np.rec.array(a)
print(record_a)
print()
print(record_a.b)

[[(0., 0) (0., 0)]
 [(0., 0) (0., 0)]]

[[0 0]
 [0 0]]


In [0]:
record_a.shape

(2, 2)

In [0]:
re_a = a.view(dtype=np.dtype((np.record, a.dtype)),
                     type=np.recarray)

print(re_a)

[[(0., 0) (0., 0)]
 [(0., 0) (0., 0)]]


In [0]:
from numpy.lib import recfunctions as rfn

## Writing custom array containers

In [0]:
class DiagonalArray:
  def __init__(self, N, value):
    self._N = N
    self._i = value
  def __repr__(self):
    return f"{self.__class__.__name__}(N = {self._N}, value = {self._i})"
  def __array__(self):
    return self._i*np.eye(self._N)
  def implements(np_function):
   "Register an __array_function__ implementation for DiagonalArray objects."
   def decorator(func):
       HANDLED_FUNCTIONS[np_function] = func
       return func
   return decorator
  
  @implements(np.mean)
  def mean(arr):
    "Implementation of np.mean for DiagonalArray objects"
    return arr._i / arr._N

  
dir(DiagonalArray)
repr(DiagonalArray(4,2))
arr = DiagonalArray(5, 10)
print(np.mean(arr))
# np.asarray(arr, dtype= int)
type(np.multiply(arr, 5))

NameError: ignored

## Subclassing ndarray



---

