In [1]:
import numpy as np
import pandas as pd

# Advanced Array Manipulation

## Reshaping

In [2]:
%%HTML
<style>
mark {
    background: teal;
    color: white;
    padding: 2px 5px
}

.i {
    color: red;
    font-weight: bold;
}
</style>

In [3]:
%%HTML
<h2><mark>Syntax</mark>: <code>ndarray.reshape(shape)</code> where shape is a tupple</h2>

In [4]:
A = np.arange(12)
A.reshape((3,4))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

## Flattenning

In [5]:
%%HTML
<ul>
    <li><span class = 'i'>ndarray.ravel()</span>: does not return a copy of data if it has to</li>
    <li><span class = 'i'>ndarray.flatten()</span>: always return a copy of data</li>
</ul>
<hr/>
<ul>
A <b>pandas-like</b> intepreattion
   <li><code>ndarray.ravel()</code>: array.flattening(inplace = True)</li>
   <li><code>ndarray.flatten()</code>: array.flattening(inplace = False)</li> 
</ul>

In [6]:
A.ravel()
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

## C versus F order

In [7]:
%%HTML
<ul>
    <li><span class = 'i'>C order</span>: operating on row-wise (<i>row major order</i>)</li>
    <li><span class = 'i'> F order</span>: operatin on column-wise (<i>column major order</i>)</li>
</ul>

In [8]:
%%HTML
<h3>Example:</h3>

Original Data:

In [9]:
A = np.arange(12).reshape((3,4))
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

Flatten along row

In [10]:
A.flatten(order = 'C')

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

flatten along column

In [11]:
A.flatten(order = 'F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

## Concatenating and Splitting Arrays

### Concatenation

In [12]:
%%HTML
<h2>Generalization: <code class = 'i'>np.concatenate([arr1, arr2, ...], axis = ?)</code></h2>

In [13]:
companies = [['Apple', 'Google'],
             ['BMI', 'Samsung']]

products = [['Mobiphone', 'Search Engine'],
            ['CPU Core', 'Qualcom Dragon']]

In [14]:
# concat as rows
np.concatenate([companies, products], axis = 0)

array([['Apple', 'Google'],
       ['BMI', 'Samsung'],
       ['Mobiphone', 'Search Engine'],
       ['CPU Core', 'Qualcom Dragon']], dtype='<U14')

In [15]:
#concat as columns
np.concatenate([companies, products], axis = 1)

array([['Apple', 'Google', 'Mobiphone', 'Search Engine'],
       ['BMI', 'Samsung', 'CPU Core', 'Qualcom Dragon']], dtype='<U14')

In [16]:
%%HTML
<h2>Equivalent functions</h2>
<ul>
    <li><span class = 'i'>Horizontal concatenation</span>: <code>np.hstack((arr1, arr2, ...))</code></li>
    <li><span class = 'i'>Vertical concatenation</span>: <code>np.vstack((arr1, arr2, ...))</code></li>
</ul>

In [17]:
%%HTML
<mark style = 'font-size:24px'>np.hstack()</mark>

In [18]:
np.hstack((companies, products))

array([['Apple', 'Google', 'Mobiphone', 'Search Engine'],
       ['BMI', 'Samsung', 'CPU Core', 'Qualcom Dragon']], dtype='<U14')

In [19]:
%%HTML
<mark style = 'font-size:24px'>np.vstack()</mark>

In [20]:
np.vstack((companies, products))

array([['Apple', 'Google'],
       ['BMI', 'Samsung'],
       ['Mobiphone', 'Search Engine'],
       ['CPU Core', 'Qualcom Dragon']], dtype='<U14')

In [21]:
%%HTML
<h2>Stacking helper: <mark>np.r_</mark> and <mark>np.c_</mark></h2>

In [22]:
%%HTML
<h3><span class = 'i'>np.r_[arr1, arr2, ...]</span>: advaced version of <mark>np.hstack</mark></h3>

In [23]:
A = [1,2,3]
B = [4,5,6]
np.r_[A, B]

array([1, 2, 3, 4, 5, 6])

In [24]:
%%HTML
<h3><span class = 'i'>np.c_[arr1, arr2, ...]</span>: advanced version of <mark>np.vstack</span></h3>

In [25]:
np.c_[A, B]

array([[1, 4],
       [2, 5],
       [3, 6]])

In [26]:
%%HTML
<h3>Indices translation</h3>

In [27]:
#translate 0:6 to array: [0,1,2,3,4,5]
np.c_[0:6, [2,3,5,7,11,13]]

array([[ 0,  2],
       [ 1,  3],
       [ 2,  5],
       [ 3,  7],
       [ 4, 11],
       [ 5, 13]])

### Splitting

In [28]:
A = np.arange(12)
A

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [29]:
#Split A into 3 equal chunks
np.split(A, 3)

[array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8,  9, 10, 11])]

In [30]:
# Split A at certain indices
np.split(A, [1, 3, 6,7, 11])

[array([0]),
 array([1, 2]),
 array([3, 4, 5]),
 array([6]),
 array([ 7,  8,  9, 10]),
 array([11])]

## Repeating Elements: Repeat and Tile

### Repeat

In [31]:
%%HTML
<ul>
    <li><span class = 'i'>ndarray.repeat(n)</span>: repeat each element of array ndarray n times</li>
    <li><span class = 'i'>ndarray.repeat((f1, f2, f3,...))</span>: repeat the first element f1 times, the second elements f2 times, ...</li>
</ul>

In [32]:
%%HTML
<h3>1-D array</h3>

In [33]:
A = np.array([0, 0,1, 3])
A.repeat(3)

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3, 3])

In [34]:
A.repeat((2,2,4,3))

array([0, 0, 0, 0, 1, 1, 1, 1, 3, 3, 3])

In [35]:
%%HTML
<h3>2-D array</h3>

In [36]:
A = np.array([
    ['Chicken', 'Pork'],
    ['Corn', 'Banana']
])
#apply ndarray.repeat(3) for each row of A
A.repeat(3, axis = 1)

array([['Chicken', 'Chicken', 'Chicken', 'Pork', 'Pork', 'Pork'],
       ['Corn', 'Corn', 'Corn', 'Banana', 'Banana', 'Banana']],
      dtype='<U7')

In [37]:
#apply ndarray.repeat((3,2)) for each column of A
A.repeat((3,2), axis = 0)

array([['Chicken', 'Pork'],
       ['Chicken', 'Pork'],
       ['Chicken', 'Pork'],
       ['Corn', 'Banana'],
       ['Corn', 'Banana']], dtype='<U7')

### Tile

In [38]:
%%HTML
<span class = 'i' style = 'font-size:24px;'>np.tile(A = arr, reps = n)</span>
<p><b>Intuition</b>: Consider the array A as a <b>Tile</b>, then you place n tiles (n array A) in a row.</p>

In [39]:
A = np.array([
    [0,1],
    [2,3]
])
np.tile(A, 2)

array([[0, 1, 0, 1],
       [2, 3, 2, 3]])

In [40]:
%%HTML
<span class = 'i' style = 'font-size:24px;'>np.tile(A = arr, reps = (n1,n2))</span>
<p><b>Intuition</b>: Consider the array A as a <b>Tile</b>, then you place make a board <i>(n1 x n2)</i>
then for each cell in that board, you put a <b>Tile (array A)</b> in.</p>

In [41]:
np.tile(A, (2,3))

array([[0, 1, 0, 1, 0, 1],
       [2, 3, 2, 3, 2, 3],
       [0, 1, 0, 1, 0, 1],
       [2, 3, 2, 3, 2, 3]])

## Fancy Indexing Equivalent: Take and Put

In [42]:
np.random.seed(101)
profit = np.array([2000, 201, 101, 30])

In [43]:
#Get the proft of the 1-th company and 3-th company

profit.take([1,3]) #equivalent: names[[1,3]]

array([201,  30])

In [44]:
#Scale the profit of the 1-th and 3-th company by 1000 times
profit.put([1,3], profit.take([1,3]) * 1000)

profit

array([  2000, 201000,    101,  30000])

## Sorting

In [45]:
%%HTML
<h3>Direct sort</h3>

In [46]:
np.random.seed(101)
A = np.random.randint(0, 1000, 500)
#return a copy of sorted array
np.sort(A, kind = 'mergesort')

array([  1,   1,   3,   5,   5,  10,  10,  11,  13,  14,  17,  20,  21,
        22,  22,  25,  26,  35,  40,  40,  40,  45,  46,  46,  47,  47,
        47,  49,  50,  52,  52,  52,  59,  59,  61,  67,  71,  71,  73,
        75,  75,  76,  76,  76,  79,  79,  82,  85,  87,  88,  92,  93,
        94,  96,  97,  97,  99, 101, 102, 103, 105, 108, 110, 110, 112,
       115, 119, 122, 125, 129, 136, 137, 140, 144, 145, 149, 150, 152,
       153, 158, 163, 169, 177, 190, 190, 192, 193, 197, 203, 207, 207,
       208, 208, 211, 211, 214, 215, 217, 219, 220, 221, 222, 223, 223,
       223, 226, 229, 232, 235, 235, 238, 239, 239, 242, 243, 244, 244,
       245, 249, 250, 250, 251, 253, 254, 262, 264, 270, 271, 273, 275,
       275, 276, 277, 280, 284, 285, 289, 294, 297, 300, 302, 303, 305,
       309, 310, 311, 316, 327, 329, 330, 337, 340, 343, 343, 344, 348,
       350, 350, 356, 357, 358, 358, 359, 359, 362, 366, 369, 370, 371,
       376, 377, 378, 378, 378, 378, 380, 383, 383, 386, 387, 39

In [48]:
#inplace sort
A.sort(kind = 'heapsort')
A

array([  1,   1,   3,   5,   5,  10,  10,  11,  13,  14,  17,  20,  21,
        22,  22,  25,  26,  35,  40,  40,  40,  45,  46,  46,  47,  47,
        47,  49,  50,  52,  52,  52,  59,  59,  61,  67,  71,  71,  73,
        75,  75,  76,  76,  76,  79,  79,  82,  85,  87,  88,  92,  93,
        94,  96,  97,  97,  99, 101, 102, 103, 105, 108, 110, 110, 112,
       115, 119, 122, 125, 129, 136, 137, 140, 144, 145, 149, 150, 152,
       153, 158, 163, 169, 177, 190, 190, 192, 193, 197, 203, 207, 207,
       208, 208, 211, 211, 214, 215, 217, 219, 220, 221, 222, 223, 223,
       223, 226, 229, 232, 235, 235, 238, 239, 239, 242, 243, 244, 244,
       245, 249, 250, 250, 251, 253, 254, 262, 264, 270, 271, 273, 275,
       275, 276, 277, 280, 284, 285, 289, 294, 297, 300, 302, 303, 305,
       309, 310, 311, 316, 327, 329, 330, 337, 340, 343, 343, 344, 348,
       350, 350, 356, 357, 358, 358, 359, 359, 362, 366, 369, 370, 371,
       376, 377, 378, 378, 378, 378, 380, 383, 383, 386, 387, 39

In [50]:
%%HTML
<h3>Indirect sort</h3>
<ul>
    <li>np.argsort</li>
    <li>np.lexsort</li>

In [55]:
%%HTML
<h3>Searchsorted: <code>ndarray.searchsorted(a value or a list of value)</h3>

In [58]:
A = np.array([0, 3, 5, 5, 5, 8, 9])

In [59]:
#bisect_left
A.searchsorted(5, side = 'left')

2

In [60]:
#bisect_right
A.searchsorted(5, side = 'right')

5

In [61]:
%%HTML
<h3>Histogram</h3>

In [65]:
bins = np.array([0, 100, 200, 300, 400, 500])
vals = np.random.randint(0, 600, 1000)
pos = bins.searchsorted(vals)
#pos[i] will tell us which bin that vals[i] belongs to
pos

array([5, 5, 2, 3, 1, 5, 6, 3, 5, 2, 5, 6, 1, 5, 4, 1, 5, 1, 3, 1, 4, 5,
       4, 4, 2, 4, 6, 5, 5, 5, 4, 1, 3, 5, 4, 1, 5, 1, 6, 4, 3, 2, 5, 4,
       4, 1, 4, 2, 3, 6, 1, 5, 1, 3, 4, 6, 5, 4, 4, 3, 4, 1, 2, 2, 4, 3,
       4, 4, 3, 6, 3, 5, 3, 1, 6, 5, 1, 5, 5, 3, 1, 4, 2, 1, 3, 2, 4, 1,
       6, 4, 2, 6, 2, 3, 5, 4, 6, 3, 1, 4, 4, 4, 6, 5, 6, 5, 2, 6, 1, 5,
       1, 1, 4, 4, 2, 4, 2, 4, 5, 3, 2, 6, 4, 3, 2, 5, 3, 5, 4, 2, 3, 2,
       5, 3, 2, 6, 3, 3, 5, 5, 2, 3, 1, 6, 6, 1, 1, 2, 1, 3, 3, 1, 4, 3,
       5, 3, 6, 3, 1, 3, 6, 4, 4, 3, 2, 5, 1, 4, 6, 5, 1, 2, 4, 5, 2, 6,
       1, 5, 1, 5, 5, 1, 2, 3, 5, 6, 6, 2, 2, 3, 2, 6, 3, 4, 3, 1, 2, 1,
       2, 1, 2, 5, 1, 1, 2, 1, 1, 4, 1, 5, 1, 5, 4, 6, 6, 1, 3, 4, 4, 4,
       5, 5, 5, 2, 5, 5, 2, 3, 2, 6, 4, 6, 2, 1, 4, 1, 6, 1, 4, 5, 6, 5,
       2, 4, 4, 5, 4, 5, 4, 1, 6, 2, 6, 5, 6, 5, 2, 1, 2, 6, 1, 3, 6, 6,
       2, 4, 1, 2, 5, 3, 6, 2, 1, 5, 1, 5, 1, 2, 1, 3, 3, 5, 3, 6, 1, 2,
       5, 2, 2, 4, 4, 5, 3, 1, 3, 3, 1, 2, 3, 6, 3,

In [67]:
%%HTML
<p>Equivalent: <span class = 'i'>np.digitize(arr)</span></p>

In [68]:
np.digitize(vals, bins)

array([5, 5, 2, 3, 1, 5, 6, 3, 5, 2, 5, 6, 1, 5, 4, 1, 5, 1, 3, 1, 4, 5,
       4, 4, 2, 4, 6, 5, 5, 5, 4, 1, 3, 5, 4, 1, 5, 1, 6, 4, 3, 2, 5, 4,
       4, 1, 4, 2, 3, 6, 1, 6, 1, 3, 4, 6, 5, 4, 4, 3, 4, 1, 2, 2, 4, 3,
       4, 4, 3, 6, 3, 5, 3, 1, 6, 5, 1, 5, 5, 3, 1, 4, 2, 1, 3, 2, 4, 1,
       6, 4, 2, 6, 2, 3, 5, 4, 6, 3, 1, 4, 4, 4, 6, 5, 6, 5, 2, 6, 1, 5,
       1, 1, 4, 4, 2, 4, 2, 4, 5, 3, 2, 6, 4, 3, 2, 5, 3, 5, 5, 2, 3, 2,
       5, 3, 2, 6, 3, 3, 5, 5, 2, 3, 1, 6, 6, 1, 1, 2, 1, 3, 3, 1, 4, 3,
       5, 3, 6, 3, 1, 3, 6, 4, 4, 3, 2, 5, 1, 4, 6, 5, 1, 2, 4, 5, 2, 6,
       1, 5, 1, 5, 5, 1, 2, 3, 5, 6, 6, 2, 2, 3, 2, 6, 3, 4, 3, 1, 2, 1,
       2, 1, 2, 5, 1, 1, 2, 1, 1, 4, 1, 5, 1, 5, 4, 6, 6, 1, 3, 4, 4, 4,
       5, 5, 5, 2, 5, 5, 2, 3, 2, 6, 4, 6, 2, 1, 4, 1, 6, 1, 4, 5, 6, 5,
       2, 4, 4, 5, 4, 5, 4, 1, 6, 2, 6, 5, 6, 5, 2, 1, 2, 6, 1, 3, 6, 6,
       2, 4, 1, 2, 5, 3, 6, 2, 1, 5, 1, 5, 1, 2, 1, 3, 3, 5, 3, 6, 1, 2,
       5, 2, 2, 4, 4, 5, 3, 1, 3, 3, 1, 2, 3, 6, 3,

## Set operation

In [5]:
%%HTML
<h2><code>np.intersect1d(arr1, arr2)</code>: <span class = 'i'>set1 & set2</span></h2>

In [4]:
import numpy as np
arr1 = ['Apple', 'Melon', 'King']
arr2 = ['King', 'Queen', ' Jack']
np.intersect1d(arr1, arr2)

array(['King'], dtype='<U5')

In [12]:
%%HTML
<h2><code>np.union1d(arr1, arr2)</code>: <span class = 'i'>set1 | set2</span></h2>

In [7]:
np.union1d(arr1, arr2)

array([' Jack', 'Apple', 'King', 'Melon', 'Queen'], dtype='<U5')

In [15]:
%%HTML
<h2><code>np.setxor1d(arr1, arr2)</code>: <span class = 'i'>set1 ^ set2</span></h2>

In [16]:
np.setxor1d(arr1, arr2)

array([' Jack', 'Apple', 'Melon', 'Queen'], dtype='<U5')

In [17]:
%%HTML
<h2><code>np.in1d(arr1, arr2)</code>: <span class = 'i'>Series.isin([val1, val2, ...])</span></h2>

In [18]:
np.in1d(arr1, ['King', 'Queen', 'Knight', 'Thief'])

array([False, False,  True])

In [14]:
np.**1d?