In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Numpy array

In [2]:
# 1D
np.array([2,3,4])

array([2, 3, 4])

In [3]:
# 2D
np.array([(2,3,4), (5,6,7)])

array([[2, 3, 4],
       [5, 6, 7]])

In [4]:
# ones matrix
np.ones((5,4))

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.]])

In [5]:
# type
np.ones((5,4), dtype=np.int_)

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

In [6]:
# eye
np.eye((5), dtype=np.int_)

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1]])

### Random

In [7]:
# random uniformly [0,1)
np.random.random((5,4))

array([[ 0.06356716,  0.27393115,  0.83639928,  0.61017919],
       [ 0.44023365,  0.68780721,  0.28001829,  0.44633625],
       [ 0.06221354,  0.01239281,  0.05039783,  0.11714536],
       [ 0.01827301,  0.63598521,  0.97940565,  0.99830885],
       [ 0.3575846 ,  0.67087718,  0.95267698,  0.60412638]])

In [8]:
# random Gaussian distribution (mean=0, std=1)
np.random.normal(size=(5,4))

array([[-0.64873491,  0.53493824,  0.02238095, -1.39469046],
       [-0.08559045,  0.62865021, -0.41505988,  0.62893503],
       [-2.09518329,  0.45086308,  0.91678133,  0.79292253],
       [ 0.29890277, -1.55940361, -0.44932938, -0.72445901],
       [-0.10104708, -0.15928221, -0.30197698,  0.497861  ]])

In [9]:
# random Gaussian(normal) distribution (mean=50, std=10)
np.random.normal(50, 10, size=(5,4))

array([[ 56.63547424,  40.28528768,  44.47864685,  48.7062822 ],
       [ 52.22018014,  57.09544166,  44.43928335,  58.49078192],
       [ 52.68317883,  64.24065593,  52.51018273,  58.59666952],
       [ 58.88742772,  57.62294917,  30.64248113,  33.09322055],
       [ 60.07923809,  40.82556235,  48.55052892,  46.6037034 ]])

In [10]:
# random Integer in range [50,60)
np.random.randint(50, 60, size=(5,4))

array([[59, 54, 59, 58],
       [58, 59, 59, 51],
       [57, 54, 59, 57],
       [59, 59, 55, 58],
       [51, 52, 55, 55]])

### Attributes

In [11]:
a = np.random.random((5,4))

print "shape:", a.shape
print "number of rows:", a.shape[0]
print "number of columns", a.shape[1]
print "dimensions:", len(a.shape)
print "number of elements:", a.size
print "data type:", a.dtype

shape: (5L, 4L)
number of rows: 5
number of columns 4
dimensions: 2
number of elements: 20
data type: float64


### Operations

In [12]:
np.random.seed(44)
a = np.random.randint(0, 10, size=(5,4))
a

array([[4, 3, 1, 3],
       [0, 4, 3, 8],
       [7, 7, 6, 9],
       [3, 7, 3, 3],
       [6, 5, 4, 5]])

In [13]:
# compute sum of each column
print "Sum of each column", a.sum(axis=0)

Sum of each column [20 26 17 28]


In [14]:
# compute sum of each row
print "Sum of each row", a.sum(axis=1)

Sum of each row [11 15 29 16 20]


In [15]:
# Stats: min, max, mean of cols
print "Min of each col:", a.min(axis=0)
print "Max of each col:", a.max(axis=0)
print "Mean of each col:", a.mean(axis=0)
print ""

# Stats: min, max, mean of rows
print "Min of each row:", a.min(axis=1)
print "Max of each row:", a.max(axis=1)
print "Mean of each row:", a.mean(axis=1)
print ""

# Stats: min, max, mean
print "Min of all elements:", a.min()
print "Max of all elements:", a.max()
print "Mean of all elements:", a.mean()

Min of each col: [0 3 1 3]
Max of each col: [7 7 6 9]
Mean of each col: [ 4.   5.2  3.4  5.6]

Min of each row: [1 0 6 3 4]
Max of each row: [4 8 9 7 6]
Mean of each row: [ 2.75  3.75  7.25  4.    5.  ]

Min of all elements: 0
Max of all elements: 9
Mean of all elements: 4.55


In [16]:
## Index

# Stats: min, max index of cols
print "Min of each col:", a.min(axis=0), "index:", a.argmin(axis=0)
print "Max of each col:", a.max(axis=0), "index:", a.argmax(axis=0)
print ""

# Stats: min, max index of rows
print "Min of each row:", a.min(axis=1), "index:", a.argmin(axis=1)
print "Max of each row:", a.max(axis=1), "index:", a.argmax(axis=1)
print ""

# Stats: min, max index
print "Min of all elements:", a.min(), "index:", a.argmin()
print "Max of all elements:", a.max(), "index:", a.argmax()

Min of each col: [0 3 1 3] index: [1 0 0 0]
Max of each col: [7 7 6 9] index: [2 2 2 2]

Min of each row: [1 0 6 3 4] index: [2 0 2 0 2]
Max of each row: [4 8 9 7 6] index: [0 3 3 1 0]

Min of all elements: 0 index: 4
Max of all elements: 9 index: 11


### Time

In [17]:
# measurement
import time

t1 = time.time()
print "fsdfdsfdfdfsdfsdgsfgdgdsgdsfrrgrgrggr" # operations
t2 = time.time()

print "Time taken by print statement:", (t2-t1)*1000, "miliseconds"

fsdfdsfdfdfsdfsdgsfgdgdsgdsfrrgrgrggr
Time taken by print statement: 0.999927520752 miliseconds


### Access and modify array elements

In [18]:
np.random.seed(418)
a = np.random.random((5,4))
a

array([[ 0.32052492,  0.21190025,  0.76364232,  0.94262042],
       [ 0.99006006,  0.41744635,  0.94986291,  0.07394805],
       [ 0.50027538,  0.0353727 ,  0.5867769 ,  0.42022024],
       [ 0.13698232,  0.10418081,  0.9520897 ,  0.25485493],
       [ 0.04297848,  0.23547558,  0.86090148,  0.34180976]])

In [19]:
# Accessing element @ (3, 2)
a[3, 2]

0.95208970281702165

In [20]:
# Slice
a[1:3, 1:3]

array([[ 0.41744635,  0.94986291],
       [ 0.0353727 ,  0.5867769 ]])

In [21]:
# Slice
a[:, 0:3:2]

array([[ 0.32052492,  0.76364232],
       [ 0.99006006,  0.94986291],
       [ 0.50027538,  0.5867769 ],
       [ 0.13698232,  0.9520897 ],
       [ 0.04297848,  0.86090148]])

In [22]:
# Modify
a[0, 0] = 1
a

array([[ 1.        ,  0.21190025,  0.76364232,  0.94262042],
       [ 0.99006006,  0.41744635,  0.94986291,  0.07394805],
       [ 0.50027538,  0.0353727 ,  0.5867769 ,  0.42022024],
       [ 0.13698232,  0.10418081,  0.9520897 ,  0.25485493],
       [ 0.04297848,  0.23547558,  0.86090148,  0.34180976]])

In [23]:
# Modify
a[1:3, 1:3] = 2
a

array([[ 1.        ,  0.21190025,  0.76364232,  0.94262042],
       [ 0.99006006,  2.        ,  2.        ,  0.07394805],
       [ 0.50027538,  2.        ,  2.        ,  0.42022024],
       [ 0.13698232,  0.10418081,  0.9520897 ,  0.25485493],
       [ 0.04297848,  0.23547558,  0.86090148,  0.34180976]])

In [24]:
# Assign a list
a[:, 3] = [1, 2, 3, 4, 5]
a

array([[ 1.        ,  0.21190025,  0.76364232,  1.        ],
       [ 0.99006006,  2.        ,  2.        ,  2.        ],
       [ 0.50027538,  2.        ,  2.        ,  3.        ],
       [ 0.13698232,  0.10418081,  0.9520897 ,  4.        ],
       [ 0.04297848,  0.23547558,  0.86090148,  5.        ]])

In [25]:
# Indices
indices = np.array([1])
a[indices]

array([[ 0.99006006,  2.        ,  2.        ,  2.        ]])

In [26]:
# Indices
indices = np.array([1, 1, 2])
a[indices]

array([[ 0.99006006,  2.        ,  2.        ,  2.        ],
       [ 0.99006006,  2.        ,  2.        ,  2.        ],
       [ 0.50027538,  2.        ,  2.        ,  3.        ]])

### Binary masking

In [27]:
a = np.array([
    (20,25,10,23,26,32,10,5,0),
    (0,2,50,20,0,1,28,5,0)
])
a

array([[20, 25, 10, 23, 26, 32, 10,  5,  0],
       [ 0,  2, 50, 20,  0,  1, 28,  5,  0]])

In [28]:
mean = a.mean()
mean

14.277777777777779

In [29]:
# masking
a[a<mean]

array([10, 10,  5,  0,  0,  2,  0,  1,  5,  0])

In [30]:
# mask and replace
a[a<mean] = mean
a

array([[20, 25, 14, 23, 26, 32, 14, 14, 14],
       [14, 14, 50, 20, 14, 14, 28, 14, 14]])

### Arithmetic Operations

In [31]:
a = np.array([
    (1,2,3,4,5),
    (10,20,30,40,50)
])
a

array([[ 1,  2,  3,  4,  5],
       [10, 20, 30, 40, 50]])

In [32]:
b = np.array([
    (100,200,300,400,500),
    (1,2,3,4,5)
])
b

array([[100, 200, 300, 400, 500],
       [  1,   2,   3,   4,   5]])

In [33]:
# Muliply by 2
a * 2

array([[  2,   4,   6,   8,  10],
       [ 20,  40,  60,  80, 100]])

In [34]:
# divide by 2
a / 2

array([[ 0,  1,  1,  2,  2],
       [ 5, 10, 15, 20, 25]])

In [35]:
# divide by 2.0
a / 2.0

array([[  0.5,   1. ,   1.5,   2. ,   2.5],
       [  5. ,  10. ,  15. ,  20. ,  25. ]])

In [36]:
# a + b
a + b

array([[101, 202, 303, 404, 505],
       [ 11,  22,  33,  44,  55]])

In [37]:
# a * b
a * b

array([[ 100,  400,  900, 1600, 2500],
       [  10,   40,   90,  160,  250]])

In [38]:
# a / b
a / b

array([[ 0,  0,  0,  0,  0],
       [10, 10, 10, 10, 10]])

### Others
[Reference](https://docs.scipy.org/doc/numpy/reference/index.html)