# Advanced Numpy
from python for Data Analysis

In [2]:
import numpy as np

## Array Manipulation

### Reshaping arrays

In [4]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [5]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [6]:
# using -1, the dimension will be inferred from the data

arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

### flattering or raveling
the opposite operation of **reshape**

In [8]:
arr = np.arange(15).reshape((5, 3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [9]:
# does not produce a copy of the underlaying data

arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [10]:
# always returns a copy of the data

arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [12]:
# see documentation for using 'F':columns or 'C': rows ordered

arr.ravel('F')

array([ 0,  3,  6,  9, 12,  1,  4,  7, 10, 13,  2,  5,  8, 11, 14])

## Concatenating and Splitting

In [13]:
arr1 = np.array([[1,2,3], [4,5,6]])
arr2 = np.array([[7,8,9], [10,11,12]])

In [18]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [22]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [24]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [25]:
from numpy.random import randn

In [31]:
arr = randn(5,2)
arr

array([[-2.5881827 , -0.31456454],
       [-0.70495898, -0.58348643],
       [-2.54838275, -0.06358856],
       [ 0.11764976, -0.91119846],
       [-0.58827418,  0.22886066]])

In [32]:
first, second, third = np.split(arr, [1,3])

In [33]:
first

array([[-2.5881827 , -0.31456454]])

In [34]:
second

array([[-0.70495898, -0.58348643],
       [-2.54838275, -0.06358856]])

### Stacking helpers: r_   and c_

In [35]:
arr = np.arange(6)
arr1 = arr.reshape(3,2)
arr2 = randn(3,2)

In [38]:
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [-0.24047504,  1.12881335],
       [ 0.78671326,  0.11149034],
       [ 0.13095797,  0.97915262]])

## Repeating Elements: Tile and Repeat

In [39]:
arr = np.arange(3)

In [41]:
# if pass an integer, repets the integer-numbers of times

arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [42]:
#if passes an array, repeats accordingly

arr.repeat([2,3,4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

#### repeated along a particular axis

In [44]:
arr = randn(2, 2)
arr

array([[ 0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426]])

In [45]:
arr.repeat(2, axis=0)

array([[ 0.82574846,  1.1552285 ],
       [ 0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426],
       [ 0.90010883,  0.02883426]])

In [48]:
arr.repeat([2, 3], axis=0)

array([[ 0.82574846,  1.1552285 ],
       [ 0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426],
       [ 0.90010883,  0.02883426],
       [ 0.90010883,  0.02883426]])

In [49]:
arr

array([[ 0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426]])

In [50]:
np.tile(arr, 2)

array([[ 0.82574846,  1.1552285 ,  0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426,  0.90010883,  0.02883426]])

In [51]:
np.tile(arr, (3, 2))

array([[ 0.82574846,  1.1552285 ,  0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426,  0.90010883,  0.02883426],
       [ 0.82574846,  1.1552285 ,  0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426,  0.90010883,  0.02883426],
       [ 0.82574846,  1.1552285 ,  0.82574846,  1.1552285 ],
       [ 0.90010883,  0.02883426,  0.90010883,  0.02883426]])

## Fancy Indexing: Take and Put

In [52]:
arr = np.arange(10)*100
inds = [7, 1, 2, 6]

In [53]:
arr[inds]

array([700, 100, 200, 600])

In [54]:
arr.take(inds)

array([700, 100, 200, 600])

In [56]:
arr.put(inds, 42)
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [58]:
arr.put(inds, [40, 41, 42, 43])
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])