### Creating ndarrays

In [1]:
import numpy as np

In [2]:
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [3]:
#Nested sequences, like a list of equal-length lists, will be converted into a multidimensional array:
data2 = [[1,2,3,4], [5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [4]:
#Number of dimensions
arr2.ndim

2

In [5]:
#Dimensions
arr2.shape

(2, 4)

In [6]:
#Elements' type
print arr1.dtype, arr2.dtype

float64 int64


#### Array creation functions

In [7]:
#Array unidimensional, inicializado com zeros, com o shape (1,)
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [8]:
#Array multidimensional, inicializado com zeros, com o shape (2,4)
np.zeros((2,4))

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [9]:
# np.zeros_like recebe um ndarray e retorna um ndarray com shape igual ao do passado
# notar que a versão _like existe para os métodos {ones, zeros, empty}
np.zeros_like(arr2)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [10]:
#Array não inicializado, com shape (2,4) - não se deve assumir que será retornado array com 0's!
# o numpy apenas não inicializa o array
np.empty((2,4))

array([[  0.00000000e+000,   0.00000000e+000,   6.90179798e-310,
          6.90179798e-310],
       [  6.90179801e-310,   6.90179798e-310,   6.90179801e-310,
          0.00000000e+000]])

In [11]:
#Array unidimensional com os elementos de range(10)
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
#Array unidimensional com os elementos do range [.0, 10.] com step .1
np.arange(0, 10, .1)

array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
        1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
        2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
        3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
        4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,
        5.5,  5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,
        6.6,  6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,
        7.7,  7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,
        8.8,  8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,
        9.9])

In [13]:
# np.asarray não cria cópia se já for ndarray
# np.array cria cópia se já for ndarray
arr3 = np.array([1,2])
arr4 = np.asarray(arr3)
arr5 = np.array(arr3)
arr4[0] = 2
arr5[0] = 5
print "veja que alteração em arr4 refletiu em arr3"
print arr3
print arr4
print "veja que alteração em arr5 não refletiu em arr3"
print arr3
print arr5

veja que alteração em arr4 refletiu em arr3
[2 2]
[2 2]
veja que alteração em arr5 não refletiu em arr3
[2 2]
[5 2]


In [14]:
#Array multidimensional, inicializado com 1s, com shape (2,4)
np.ones((2,4))

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.]])

In [15]:
#Identidade - array 2d de tamanho 10
np.eye(4)

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

### Data Types for ndarrays

The numerical dtypes are named the same way: a type name, like float or int, followed by a number indicating the number of bits per element.

In [16]:
#cast
arr = np.array([1,2,3,4,5])
arr.dtype

dtype('int64')

In [17]:
float_arr = arr.astype(np.float64)

In [18]:
float_arr.dtype

dtype('float64')

In [19]:
#na conversão de float->int, a parte decimal é truncada
arr = np.array([1.2, -3.4])
arr

array([ 1.2, -3.4])

In [20]:
arr.astype(np.int32)

array([ 1, -3], dtype=int32)

In [21]:
#! pode-se converter um array de strings para numérico! - bom para leitura de arquivos ascii
# observar que o dtype informado no np.array é o original
numeric_string = np.array(['1.25', '-9.6', '42'], dtype=np.string_)

In [22]:
numeric_string.astype(np.float)

array([  1.25,  -9.6 ,  42.  ])

### Operations between Arrays and Scalars

In [23]:
a = np.array([1,2,3,4])

In [24]:
a*2

array([2, 4, 6, 8])

In [25]:
a+2

array([3, 4, 5, 6])

In [26]:
a**2

array([ 1,  4,  9, 16])

In [27]:
a/2.

array([ 0.5,  1. ,  1.5,  2. ])

In [28]:
a-2

array([-1,  0,  1,  2])

### Basic indexing and slicing

In [29]:
a = np.arange(100).reshape((10,10))

In [30]:
print a

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]]


In [31]:
a[:,0] # todos da primeira dimensão :, apenas o 0 da segunda dimensão

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [32]:
a[0,:] # o 0 da primeira dimensão e todos da segunda dimensão

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [33]:
a[0, :] = np.zeros_like(a[0, :])

In [34]:
a

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [35]:
a[0][:] # outra notação, equivalente a a[0,:]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [36]:
a[:, :2] # todas as linhas, até a coluna 1

array([[ 0,  0],
       [10, 11],
       [20, 21],
       [30, 31],
       [40, 41],
       [50, 51],
       [60, 61],
       [70, 71],
       [80, 81],
       [90, 91]])

In [37]:
a[:-1, :-1] # todas linhas menos a última, todas colunas menos a última

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0],
       [10, 11, 12, 13, 14, 15, 16, 17, 18],
       [20, 21, 22, 23, 24, 25, 26, 27, 28],
       [30, 31, 32, 33, 34, 35, 36, 37, 38],
       [40, 41, 42, 43, 44, 45, 46, 47, 48],
       [50, 51, 52, 53, 54, 55, 56, 57, 58],
       [60, 61, 62, 63, 64, 65, 66, 67, 68],
       [70, 71, 72, 73, 74, 75, 76, 77, 78],
       [80, 81, 82, 83, 84, 85, 86, 87, 88]])

In [38]:
a[1:-2, 1:-2] # linhas de 1 a penúltima, colunas de 1 a penúltima

array([[11, 12, 13, 14, 15, 16, 17],
       [21, 22, 23, 24, 25, 26, 27],
       [31, 32, 33, 34, 35, 36, 37],
       [41, 42, 43, 44, 45, 46, 47],
       [51, 52, 53, 54, 55, 56, 57],
       [61, 62, 63, 64, 65, 66, 67],
       [71, 72, 73, 74, 75, 76, 77]])

In [39]:
a[(a % 2 == 0) | (a % 3 == 0)] = 2 # atribui apenas aos múltiplos de 2 ou de 3

In [40]:
a

array([[ 2,  2,  2,  2,  2,  2,  2,  2,  2,  2],
       [ 2, 11,  2, 13,  2,  2,  2, 17,  2, 19],
       [ 2,  2,  2, 23,  2, 25,  2,  2,  2, 29],
       [ 2, 31,  2,  2,  2, 35,  2, 37,  2,  2],
       [ 2, 41,  2, 43,  2,  2,  2, 47,  2, 49],
       [ 2,  2,  2, 53,  2, 55,  2,  2,  2, 59],
       [ 2, 61,  2,  2,  2, 65,  2, 67,  2,  2],
       [ 2, 71,  2, 73,  2,  2,  2, 77,  2, 79],
       [ 2,  2,  2, 83,  2, 85,  2,  2,  2, 89],
       [ 2, 91,  2,  2,  2, 95,  2, 97,  2,  2]])

In [41]:
a[[1,2],[3,4]] # retorna os pares (1,3) e (2,4)

array([13,  2])

In [42]:
print a[1,3], a[3,4]

13 2


In [43]:
a[np.ix_([1,2],[3,4])]

array([[13,  2],
       [23,  2]])

### Conditional logic with arrays

In [44]:
a = np.arange(100).reshape((10,10))

In [45]:
np.where((a % 2 == 0) | (a % 3 == 0), 'm23', 'nm23')

array([['m23', 'nm23', 'm23', 'm23', 'm23', 'nm23', 'm23', 'nm23', 'm23',
        'm23'],
       ['m23', 'nm23', 'm23', 'nm23', 'm23', 'm23', 'm23', 'nm23', 'm23',
        'nm23'],
       ['m23', 'm23', 'm23', 'nm23', 'm23', 'nm23', 'm23', 'm23', 'm23',
        'nm23'],
       ['m23', 'nm23', 'm23', 'm23', 'm23', 'nm23', 'm23', 'nm23', 'm23',
        'm23'],
       ['m23', 'nm23', 'm23', 'nm23', 'm23', 'm23', 'm23', 'nm23', 'm23',
        'nm23'],
       ['m23', 'm23', 'm23', 'nm23', 'm23', 'nm23', 'm23', 'm23', 'm23',
        'nm23'],
       ['m23', 'nm23', 'm23', 'm23', 'm23', 'nm23', 'm23', 'nm23', 'm23',
        'm23'],
       ['m23', 'nm23', 'm23', 'nm23', 'm23', 'm23', 'm23', 'nm23', 'm23',
        'nm23'],
       ['m23', 'm23', 'm23', 'nm23', 'm23', 'nm23', 'm23', 'm23', 'm23',
        'nm23'],
       ['m23', 'nm23', 'm23', 'm23', 'm23', 'nm23', 'm23', 'nm23', 'm23',
        'm23']], 
      dtype='|S4')

In [46]:
np.where((a % 2 == 0) | (a % 3 == 0), 0, a)

array([[ 0,  1,  0,  0,  0,  5,  0,  7,  0,  0],
       [ 0, 11,  0, 13,  0,  0,  0, 17,  0, 19],
       [ 0,  0,  0, 23,  0, 25,  0,  0,  0, 29],
       [ 0, 31,  0,  0,  0, 35,  0, 37,  0,  0],
       [ 0, 41,  0, 43,  0,  0,  0, 47,  0, 49],
       [ 0,  0,  0, 53,  0, 55,  0,  0,  0, 59],
       [ 0, 61,  0,  0,  0, 65,  0, 67,  0,  0],
       [ 0, 71,  0, 73,  0,  0,  0, 77,  0, 79],
       [ 0,  0,  0, 83,  0, 85,  0,  0,  0, 89],
       [ 0, 91,  0,  0,  0, 95,  0, 97,  0,  0]])

### Mathematical and statistical methods

In [47]:
a = np.arange(100).reshape((10,10))

In [48]:
a.std()

28.866070047722118

In [49]:
a.var()

833.25

In [50]:
a.argmin()

0

In [51]:
a.argmax()

99

In [52]:
a.mean()

49.5

In [53]:
a.cumsum()

array([   0,    1,    3,    6,   10,   15,   21,   28,   36,   45,   55,
         66,   78,   91,  105,  120,  136,  153,  171,  190,  210,  231,
        253,  276,  300,  325,  351,  378,  406,  435,  465,  496,  528,
        561,  595,  630,  666,  703,  741,  780,  820,  861,  903,  946,
        990, 1035, 1081, 1128, 1176, 1225, 1275, 1326, 1378, 1431, 1485,
       1540, 1596, 1653, 1711, 1770, 1830, 1891, 1953, 2016, 2080, 2145,
       2211, 2278, 2346, 2415, 2485, 2556, 2628, 2701, 2775, 2850, 2926,
       3003, 3081, 3160, 3240, 3321, 3403, 3486, 3570, 3655, 3741, 3828,
       3916, 4005, 4095, 4186, 4278, 4371, 4465, 4560, 4656, 4753, 4851,
       4950])

### Methods for boolean arrays

In [54]:
a = np.arange(100).reshape((10,10))

In [55]:
(a % 2 == 0).sum() # quantidade de múltiplos de 2

50

In [56]:
(a >= 0).all() # true se for all

True

In [57]:
(a == 99).any() # true se for any

True

### Sorting

In [59]:
a = np.random.randn(25).reshape((5,5))

In [60]:
a

array([[-1.25507855, -0.25562812, -1.11683469, -1.58353661, -0.70239006],
       [-0.6977756 , -0.27282808,  0.26527251, -0.76124006,  0.4604497 ],
       [ 0.49664318,  1.13143024,  0.73887521, -0.48263522, -0.3458606 ],
       [-0.13357431,  0.30721723, -1.27229934, -0.10002671, -0.57948168],
       [-0.7326581 , -0.62539883, -0.46190354, -0.8780007 , -0.23877977]])

In [61]:
a.sort() # sort in place

In [62]:
a

array([[-1.58353661, -1.25507855, -1.11683469, -0.70239006, -0.25562812],
       [-0.76124006, -0.6977756 , -0.27282808,  0.26527251,  0.4604497 ],
       [-0.48263522, -0.3458606 ,  0.49664318,  0.73887521,  1.13143024],
       [-1.27229934, -0.57948168, -0.13357431, -0.10002671,  0.30721723],
       [-0.8780007 , -0.7326581 , -0.62539883, -0.46190354, -0.23877977]])

In [63]:
a = np.random.randn(25).reshape((5,5))

In [64]:
np.sort(a) # retorna uma cópia ordenada

array([[-0.623383  , -0.26547139, -0.16237764, -0.14637227,  0.27263648],
       [-1.42091165, -0.57612838, -0.57284206,  1.13127151,  1.25791774],
       [ 0.25197906,  0.37015358,  0.50861848,  0.56214873,  0.91624139],
       [-1.52715166, -0.01481312,  0.01692122,  0.19638824,  0.6853683 ],
       [ 0.10687991,  0.36492473,  0.44568532,  0.99516016,  1.69323809]])

In [65]:
a

array([[-0.14637227, -0.26547139,  0.27263648, -0.623383  , -0.16237764],
       [-0.57284206,  1.13127151, -1.42091165, -0.57612838,  1.25791774],
       [ 0.50861848,  0.91624139,  0.25197906,  0.56214873,  0.37015358],
       [ 0.19638824,  0.01692122, -0.01481312,  0.6853683 , -1.52715166],
       [ 0.99516016,  1.69323809,  0.36492473,  0.44568532,  0.10687991]])

In [66]:
a.sort(0, kind='heapsort') # o n informa sobre qual axis ordenar. 
# No caso, ordenar sobre as linhas(notar que é diferente de ordenar AS linhas, que seria ordenar sobre as colunas)
# o kind pode ser {'quicksort', 'mergesort', 'heapsort'}

In [67]:
a

array([[-0.57284206, -0.26547139, -1.42091165, -0.623383  , -1.52715166],
       [-0.14637227,  0.01692122, -0.01481312, -0.57612838, -0.16237764],
       [ 0.19638824,  0.91624139,  0.25197906,  0.44568532,  0.10687991],
       [ 0.50861848,  1.13127151,  0.27263648,  0.56214873,  0.37015358],
       [ 0.99516016,  1.69323809,  0.36492473,  0.6853683 ,  1.25791774]])

### Set logic

In [68]:
a = np.arange(20)
b = np.array([2,3,7])
# para cada elemento em a, retorna True se ele pertence a b, False, caso contrário
np.in1d(a, b)

array([False, False,  True,  True, False, False, False,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False], dtype=bool)

In [69]:
np.intersect1d(a,b)

array([2, 3, 7])

In [70]:
np.union1d(a,[-1,40])

array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
       16, 17, 18, 19, 40])

In [71]:
np.setdiff1d(a,b)

array([ 0,  1,  4,  5,  6,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [72]:
np.setxor1d([1,2,3],[2,3,4]) # elementos que estão apenas em um dos conjuntos

array([1, 4])