In [16]:
import numpy as np

## Working with ndarray

In [3]:
# generating 1d ndarray using arange
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
# start, stop
np.arange(1, 10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
# start, stop, step
np.arange(1, 10, 0.5)

array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
        6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5])

In [12]:
np.arange(1, 10, 3)

array([1, 4, 7])

In [13]:
# array elements have homogenous datatype (unlike python lists)
np.arange(1, 10, 2, dtype=np.float64)

array([ 1.,  3.,  5.,  7.,  9.])

### Examining ndarray

In [20]:
ds = np.arange(1, 10, 2)
print(ds)
# number of dimensions
ds.ndim

[1 3 5 7 9]


1

In [19]:
# shape of matrix
ds.shape

(5,)

In [23]:
# size of array
ds.size

5

In [24]:
# datatype
ds.dtype

dtype('int64')

In [26]:
# size of a single data item in bytes
ds.itemsize

8

In [34]:
# data stored in memory
x = ds.data
list(x)

[1, 3, 5, 7, 9]

In [31]:
ds

array([1, 3, 5, 7, 9])

In [35]:
# memory usage of data in bytes
ds.size * ds.itemsize

40

### Numpy Efficiency

In [45]:
# generate array of 1-1000 elements

# Regular Python (16 us on my machine)
%timeit python_list_1 = list(range(1, 1000))

# Numpy (2 us, about 8x faster)
%timeit numpy_list_1 = np.arange(1, 1000)

The slowest run took 4.08 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 15.3 µs per loop
The slowest run took 17.86 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.1 µs per loop


### Basic Operations

In [46]:
pylist1 = list(range(1, 1000))
pylist2 = list(range(1, 1000))

nplist1 = np.arange(1, 1000)
nplist2 = np.arange(1, 1000)

In [50]:
%%timeit
# Regular Python (about 300 us)
[(x + y) for x, y in zip(pylist1, pylist2)]
[(x - y) for x, y in zip(pylist1, pylist2)]
[(x * y) for x, y in zip(pylist1, pylist2)]
[(x / y) for x, y in zip(pylist1, pylist2)]

1000 loops, best of 3: 364 µs per loop


In [56]:
%%timeit
# numpy (about 15 us, about 20x faster)
nplist1 + nplist2
nplist1 - nplist2
nplist1 * nplist2
nplist1 / nplist2

The slowest run took 20.08 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 15.7 µs per loop


### Array creation

In [60]:
# 1d array
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [66]:
# 2d array
np.array([[1,2], [3,4], [5,6]])

array([[1, 2],
       [3, 4],
       [5, 6]])

### zeros

In [67]:
# 3x4 matrix where all items are 0
np.zeros((3, 4))

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [77]:
np.zeros((3,4), dtype=np.int64) # generates float64 by default

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

### ones

In [73]:
np.ones(10)

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [75]:
np.ones((3,4,2))

array([[[ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  1.]],

       [[ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  1.]],

       [[ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  1.]]])

### linspace

In [78]:
# array of evenly-spaced numbers between 1 and 5
# 50 samples by default
np.linspace(1, 5)

array([ 1.        ,  1.08163265,  1.16326531,  1.24489796,  1.32653061,
        1.40816327,  1.48979592,  1.57142857,  1.65306122,  1.73469388,
        1.81632653,  1.89795918,  1.97959184,  2.06122449,  2.14285714,
        2.2244898 ,  2.30612245,  2.3877551 ,  2.46938776,  2.55102041,
        2.63265306,  2.71428571,  2.79591837,  2.87755102,  2.95918367,
        3.04081633,  3.12244898,  3.20408163,  3.28571429,  3.36734694,
        3.44897959,  3.53061224,  3.6122449 ,  3.69387755,  3.7755102 ,
        3.85714286,  3.93877551,  4.02040816,  4.10204082,  4.18367347,
        4.26530612,  4.34693878,  4.42857143,  4.51020408,  4.59183673,
        4.67346939,  4.75510204,  4.83673469,  4.91836735,  5.        ])

In [79]:
# num specifies the sample size
np.linspace(0, 2, num=4)

array([ 0.        ,  0.66666667,  1.33333333,  2.        ])

In [80]:
# set endpoint to False to exclude the stop number
np.linspace(0, 2, num=4, endpoint=False)

array([ 0. ,  0.5,  1. ,  1.5])

In [81]:
# return the step used as part of the result
array, step = np.linspace(0, 2, num=4, endpoint=False,
                          retstep=True)
print(step)

0.5


### random

In [85]:
# 2x3 matrix with random numbers between 0 and 1
np.random.random((2,3))

array([[ 0.39845816,  0.18059089,  0.873131  ],
       [ 0.12068546,  0.68426623,  0.05016297]])

### Statistical analysis

In [96]:
dataset = np.random.random((2,3))
print(dataset)

[[ 0.70416267  0.42507494  0.09827604]
 [ 0.33680401  0.58578102  0.51145515]]


#### max

In [114]:
# max across all elements in array (flattened data)
dataset.max()

0.7041626745753129

In [115]:
# the max in each col
dataset.max(axis=0)

array([ 0.70416267,  0.58578102,  0.51145515])

In [116]:
# the max in each row
dataset.max(axis=1)

array([ 0.70416267,  0.58578102])

#### min

In [117]:
dataset.min()

0.098276042802181385

In [118]:
dataset.min(axis=0)

array([ 0.33680401,  0.42507494,  0.09827604])

In [119]:
dataset.min(axis=1)

array([ 0.09827604,  0.33680401])

#### mean

In [125]:
dataset.mean()

0.44359230648089953

#### median

In [126]:
np.median(dataset)

0.46826504485227305

#### std

In [128]:
# standard deviation
dataset.std()

0.1930931847076118

#### sum

In [129]:
dataset.sum()

2.6615538388853972

In [131]:
dataset.sum(axis=0)

array([ 1.04096669,  1.01085596,  0.60973119])

### Reshaping

In [132]:
dataset

array([[ 0.70416267,  0.42507494,  0.09827604],
       [ 0.33680401,  0.58578102,  0.51145515]])

#### reshape

In [134]:
# the source and output datasets should have the same total size
dataset.reshape((3,2))

array([[ 0.70416267,  0.42507494],
       [ 0.09827604,  0.33680401],
       [ 0.58578102,  0.51145515]])

In [135]:
dataset.reshape((6, 1))

array([[ 0.70416267],
       [ 0.42507494],
       [ 0.09827604],
       [ 0.33680401],
       [ 0.58578102],
       [ 0.51145515]])

In [137]:
# 1d array
dataset.reshape(6)

array([ 0.70416267,  0.42507494,  0.09827604,  0.33680401,  0.58578102,
        0.51145515])

#### ravel

In [138]:
# flattens the array
dataset.ravel()

array([ 0.70416267,  0.42507494,  0.09827604,  0.33680401,  0.58578102,
        0.51145515])

### Slicing

In [139]:
dataset = np.random.random((5, 10))
dataset

array([[ 0.25196182,  0.37386856,  0.79217584,  0.18780921,  0.97864133,
         0.46464074,  0.52408502,  0.47810457,  0.74910742,  0.59959729],
       [ 0.43355448,  0.51209536,  0.26184569,  0.69491137,  0.10648334,
         0.4123912 ,  0.50148936,  0.6902906 ,  0.88400607,  0.04378374],
       [ 0.71216524,  0.96255363,  0.56118607,  0.30269015,  0.55319101,
         0.04694539,  0.18154305,  0.76220385,  0.01445689,  0.17779057],
       [ 0.38466917,  0.99779336,  0.96944632,  0.73746338,  0.43872333,
         0.80188057,  0.00826241,  0.38569747,  0.27542679,  0.0517101 ],
       [ 0.06002231,  0.3383402 ,  0.04515675,  0.15436348,  0.97334308,
         0.43532022,  0.16386904,  0.55137801,  0.06437014,  0.64816225]])

In [141]:
# row #1
dataset[1]

array([ 0.43355448,  0.51209536,  0.26184569,  0.69491137,  0.10648334,
        0.4123912 ,  0.50148936,  0.6902906 ,  0.88400607,  0.04378374])

In [143]:
dataset[1][0]

0.43355448402749519

In [146]:
# shorter indexing notation
dataset[1, 0]

0.43355448402749519

#### slicing a range

In [149]:
# rows 2 and 3
dataset[2:4]

array([[ 0.71216524,  0.96255363,  0.56118607,  0.30269015,  0.55319101,
         0.04694539,  0.18154305,  0.76220385,  0.01445689,  0.17779057],
       [ 0.38466917,  0.99779336,  0.96944632,  0.73746338,  0.43872333,
         0.80188057,  0.00826241,  0.38569747,  0.27542679,  0.0517101 ]])

In [151]:
dataset[2:4, 0]

array([ 0.71216524,  0.38466917])

In [152]:
dataset[2:4, 0:2]

array([[ 0.71216524,  0.96255363],
       [ 0.38466917,  0.99779336]])

In [154]:
# everything in dim 0 and 0'th element in dim 1
dataset[:, 0]

array([ 0.25196182,  0.43355448,  0.71216524,  0.38466917,  0.06002231])

#### stepping

In [158]:
# step of 1 is default
dataset[2:4:1]

array([[ 0.71216524,  0.96255363,  0.56118607,  0.30269015,  0.55319101,
         0.04694539,  0.18154305,  0.76220385,  0.01445689,  0.17779057],
       [ 0.38466917,  0.99779336,  0.96944632,  0.73746338,  0.43872333,
         0.80188057,  0.00826241,  0.38569747,  0.27542679,  0.0517101 ]])

In [159]:
# everything
dataset[::]

array([[ 0.25196182,  0.37386856,  0.79217584,  0.18780921,  0.97864133,
         0.46464074,  0.52408502,  0.47810457,  0.74910742,  0.59959729],
       [ 0.43355448,  0.51209536,  0.26184569,  0.69491137,  0.10648334,
         0.4123912 ,  0.50148936,  0.6902906 ,  0.88400607,  0.04378374],
       [ 0.71216524,  0.96255363,  0.56118607,  0.30269015,  0.55319101,
         0.04694539,  0.18154305,  0.76220385,  0.01445689,  0.17779057],
       [ 0.38466917,  0.99779336,  0.96944632,  0.73746338,  0.43872333,
         0.80188057,  0.00826241,  0.38569747,  0.27542679,  0.0517101 ],
       [ 0.06002231,  0.3383402 ,  0.04515675,  0.15436348,  0.97334308,
         0.43532022,  0.16386904,  0.55137801,  0.06437014,  0.64816225]])

In [157]:
# every other value
dataset[::2]

array([[ 0.25196182,  0.37386856,  0.79217584,  0.18780921,  0.97864133,
         0.46464074,  0.52408502,  0.47810457,  0.74910742,  0.59959729],
       [ 0.71216524,  0.96255363,  0.56118607,  0.30269015,  0.55319101,
         0.04694539,  0.18154305,  0.76220385,  0.01445689,  0.17779057],
       [ 0.06002231,  0.3383402 ,  0.04515675,  0.15436348,  0.97334308,
         0.43532022,  0.16386904,  0.55137801,  0.06437014,  0.64816225]])

In [161]:
dataset[2:4]

array([[ 0.71216524,  0.96255363,  0.56118607,  0.30269015,  0.55319101,
         0.04694539,  0.18154305,  0.76220385,  0.01445689,  0.17779057],
       [ 0.38466917,  0.99779336,  0.96944632,  0.73746338,  0.43872333,
         0.80188057,  0.00826241,  0.38569747,  0.27542679,  0.0517101 ]])

In [162]:
# every other col of row 2 and 3
dataset[2:4, ::2]

array([[ 0.71216524,  0.56118607,  0.55319101,  0.18154305,  0.01445689],
       [ 0.38466917,  0.96944632,  0.43872333,  0.00826241,  0.27542679]])