#Numpy Tutorial

Numpy is a computational library for Python that is optimized for operations on multi-dimensional arrays. In this notebook we will use numpy to work with 1-d arrays (often called vectors) and 2-d arrays (often called matrices).

For a the full user guide and reference for numpy see: http://docs.scipy.org/doc/numpy/

In [1]:
import numpy as np # importing this way allows us to refer to numpy as np

# Creating Numpy Arrays

New arrays can be made in several ways. We can take an existing list and convert it to a numpy array:

In [2]:
mylist = [1., 2., 3., 4.]
mynparray = np.array(mylist)
mynparray

array([ 1.,  2.,  3.,  4.])

You can initialize an array (of any dimension) of all ones or all zeroes with the ones() and zeros() functions:

In [3]:
one_vector = np.ones(4)
print one_vector # using print removes the array() portion

[ 1.  1.  1.  1.]


In [4]:
one2Darray = np.zeros((2, 4)) # an 2D array with 2 "rows" and 4 "columns"
print one2Darray

[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]


In [5]:
zero_vector = np.zeros(4)
print zero_vector

[ 0.  0.  0.  0.]


You can also initialize an empty array which will be filled with values. This is the fastest way to initialize a fixed-size numpy array however you must ensure that you replace all of the values.

In [6]:
empty_vector = np.empty(5)
print empty_vector

[  3.54214792e-316   2.59023599e-316   3.54216689e-316   1.88923509e-316
   3.53121523e-316]


#Accessing array elements

Accessing an array is straight forward. For vectors you access the index by referring to it inside square brackets. Recall that indices in Python start with 0.

In [7]:
mynparray[2]

3.0

2D arrays are accessed similarly by referring to the row and column index separated by a comma:

In [8]:
my_matrix = np.array([[1, 2, 3], [4, 5, 6]])
print my_matrix

[[1 2 3]
 [4 5 6]]


In [9]:
print my_matrix[1, 2]

6


In [10]:
print my_matrix[1][2]

6


Sequences of indices can be accessed using ':' for example

In [11]:
print my_matrix[0:2, 2] # recall 0:2 = [0, 1]

[3 6]


In [12]:
print my_matrix[0, 0:3]

[1 2 3]


You can also pass a list of indices. 

In [13]:
import numpy as np
fib_indices = np.array([1, 1, 2, 3])
random_vector = np.random.random(10) # 10 random numbers between 0 and 1
print random_vector
random_vector2 = np.random.randint(1, 6) 
print "random_vector2 :\n",random_vector2
random_vector3= np.random.rand(3,2)*100
print "random_vectir3:\n",random_vector3

[ 0.26778729  0.77828832  0.29455702  0.59060385  0.50441565  0.41784199
  0.88387887  0.13884825  0.973098    0.92844813]
random_vector2 :
3
random_vectir3:
[[  9.91505106e+01   3.23322195e+01]
 [  6.60773690e-02   4.25871734e+01]
 [  8.02325986e+01   4.98862607e+01]]


In [14]:
print random_vector[fib_indices]

[ 0.77828832  0.77828832  0.29455702  0.59060385]


You can also use true/false values to select values

In [15]:
my_vector = np.array([1, 2, 3, 4])
select_index = np.array([True, False, True, False])
print my_vector[select_index]

[1 3]


For 2D arrays you can select specific columns and specific rows. Passing ':' selects all rows/columns

In [16]:
select_cols = np.array([True, False, True]) # 1st and 3rd column
select_rows = np.array([False, True]) # 2nd row

In [17]:
print my_matrix[select_rows, :] # just 2nd row but all columns

[[4 5 6]]


In [18]:
print my_matrix[:, select_cols] # all rows and just the 1st and 3rd column

[[1 3]
 [4 6]]


#Operations on Arrays

You can use the operations '\*', '\*\*', '\\', '+' and '-' on numpy arrays and they operate elementwise.

In [19]:
#setting an array element with a sequence. exmaple
# happens if we try to write something into a single place (array cell, matrix entry) 
# of an array and this something is not a scalar value but vector(ex 2 by 3 array, or list)
import numpy as np
A = np.zeros((3,3))
A[1,1] = np.array([1,2])


ValueError: setting an array element with a sequence.

In [20]:
#setting an array element with a sequence. exmaple
v = np.array([[1,2,3]])
print "([[1,2,3]]):",v.shape
print "traspose v: \n",(np.transpose(v)).shape
print np.transpose(v)
print  np.dot(np.transpose(v), v)
A[1,1] = np.dot(np.transpose(v), v)

([[1,2,3]]): (1L, 3L)
traspose v: 
(3L, 1L)
[[1]
 [2]
 [3]]
[[1 2 3]
 [2 4 6]
 [3 6 9]]


ValueError: setting an array element with a sequence.

In [21]:
import numpy as np
my2_np = np.array([0.,0.])
l2_penalty = 1.0
weights = my2_np
features =np.array([[1.00000000e+00,  1.18000000e+03],
                    [1.00000000e+00 ,  2.57000000e+03]])
errors= [-221900., -538000.]
np_err = np.array(errors)
def feature_derivative_ridge(feature_is_constant):
    # If feature_is_constant is True, derivative is twice the dot product of errors and feature
    if feature_is_constant ==True:   #constant 
        derivative = np.array(-18752698930.0)
    else:   # list
        derivative =np.array([ -1.87526989e+10 , -1.87526989e+10])
    return derivative

for i in range(len(my2_np)):
    if i == 0:
        derivative = 2 * np.dot(features[:i], np_err )      #shapes (2,2) and (1,) not aligned: 2 (dim 1) != 1 (dim 0) error !
        #derivative = 2 * np.dot(errors,features[:i] )     
        print "derivative1", derivative     #same
    else :
        derivative = 2 * np.dot(features[:i],np_err ) + 2 * l2_penalty*weight 
        print derivative
    np_der = np.dot(1e-12, derivative)
    print "np_der",np_der
    my2_np[i] = my2_np[i] + np_der
print my2_np    

derivative1 []
np_der []


ValueError: setting an array element with a sequence.

In [22]:
my_array = np.array([1., 2., 3., 4.])
print my_array*my_array

[  1.   4.   9.  16.]


In [23]:
print my_array**2

[  1.   4.   9.  16.]


In [24]:
print my_array - np.ones(4)

[ 0.  1.  2.  3.]


In [25]:
print my_array + np.ones(4)

[ 2.  3.  4.  5.]


In [26]:
print my_array / 3

[ 0.33333333  0.66666667  1.          1.33333333]


In [27]:
print my_array / np.array([2., 3., 4., 5.]) # = [1.0/2.0, 2.0/3.0, 3.0/4.0, 4.0/5.0]

[ 0.5         0.66666667  0.75        0.8       ]


You can compute the sum with np.sum() and the average with np.average()

In [28]:
print np.sum(my_array)

10.0


In [29]:
print np.average(my_array)

2.5


In [30]:
print np.sum(my_array)/len(my_array)

2.5


#The dot product

An important mathematical operation in linear algebra is the dot product. 

When we compute the dot product between two vectors we are simply multiplying them elementwise and adding them up. In numpy you can do this with np.dot()

In [31]:
array1 = np.array([1., 2., 3., 4.])
array2 = np.array([2., 3., 4., 5.])
print np.dot(array1, array2)

40.0


In [32]:
ar1 = np.array([1,2,3])   # 1d array -> check only lengh is same?
ar2 = np.array([2,3,4])
ar3 = np.dot(ar1, ar2)
print ar3

20


In [33]:
print np.sum(array1*array2)

40.0


Recall that the Euclidean length (or magnitude) of a vector is the squareroot of the sum of the squares of the components. This is just the squareroot of the dot product of the vector with itself:

In [34]:
array1_mag = np.sqrt(np.dot(array1, array1))
print array1_mag

5.47722557505


In [35]:
print np.sqrt(np.sum(array1*array1))

5.47722557505


We can also use the dot product when we have a 2D array (or matrix). When you have an vector with the same number of elements as the matrix (2D array) has columns you can right-multiply the matrix by the vector to get another vector with the same number of elements as the matrix has rows. For example this is how you compute the predicted values given a matrix of features and an array of weights.

In [36]:
my_features = np.array([[1., 2.], [3., 4.], [5., 6.], [7., 8.]])
print my_features
my_features.shape

[[ 1.  2.]
 [ 3.  4.]
 [ 5.  6.]
 [ 7.  8.]]


(4L, 2L)

In [37]:
my_weights = np.array([0.4, 0.5])
print my_weights
my_weights.shape

[ 0.4  0.5]


(2L,)

In [38]:
array_test1 = [3,4,5]
np_ar1 = np.array(array_test1)
print np_ar1
ar2 = [3,4,5]
np_ar2 = np.array([3,4,5])
np_ar3 = np.array([3,
                   4,
                   5])
print np_ar2
print np_ar2.shape
print np_ar3
print np_ar3.shape          #3by1 array
print ar2
print np_ar3.reshape(1,3)   #1by3 array

[3 4 5]
[3 4 5]
(3L,)
[3 4 5]
(3L,)
[3, 4, 5]
[[3 4 5]]


In [39]:
my_predictions = np.dot(my_features, my_weights) # note that the weights are on the right
my_predictions2 = np.array(my_features*my_weights)
print my_predictions # which has 4 elements since my_features has 4 rows

[ 1.4  3.2  5.   6.8]


In [40]:
ar1 = [[1,2,3],[4,5,6]]
ar2 = [[2,3],[4,5],[1,5]]
np_ar1=np.array(ar1)
np_ar2=np.array(ar2)
x = np.arange(9).reshape((3,3))
y = np.arange(3)
print "np.dot(x,y) :\n",np.dot(x,y)
print "np.arange(9).reshape((3,3)):\n",np.arange(9).reshape((3,3))
print np_ar1.shape
print np_ar2.shape
np_res = np.dot(np_ar1, np_ar2) # np.dot(np_array(m,n) , np_array(n,k))
np_res2 = np.array(np_ar1*np_ar2.reshape((2,3)))   # array(m by n) * array(m by n) :'*' is possible only tow arrays are same (m by n)
print "np.dot(np_ar1, np_ar2) :\n",np_res
print np_res2

np.dot(x,y) :
[ 5 14 23]
np.arange(9).reshape((3,3)):
[[0 1 2]
 [3 4 5]
 [6 7 8]]
(2L, 3L)
(3L, 2L)
np.dot(np_ar1, np_ar2) :
[[13 28]
 [34 67]]
[[ 2  6 12]
 [20  5 30]]


In [41]:
arr_t1 = np.arange(6)
print arr_t1
re_arr_t1= arr_t1.reshape((3,2))
print re_arr_t1
arr_t2 = np.arange(2,14,2)
print "np.arange(2,14,2):",arr_t2
print "np.arange(1,15):",np.arange(1,15)
my_arr_t2 = arr_t2.reshape((2,3))
print my_arr_t2
my_np_arr = np.dot(re_arr_t1,my_arr_t2)
print my_np_arr


[0 1 2 3 4 5]
[[0 1]
 [2 3]
 [4 5]]
np.arange(2,14,2): [ 2  4  6  8 10 12]
np.arange(1,15): [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[[ 2  4  6]
 [ 8 10 12]]
[[ 8 10 12]
 [28 38 48]
 [48 66 84]]


Similarly if you have a vector with the same number of elements as the matrix has *rows* you can left multiply them.

In [42]:
my_matrix = my_features
my_array = np.array([0.3, 0.4, 0.5, 0.6])
my_array.shape

(4L,)

In [43]:
print np.dot(my_array, my_matrix) # which has 2 elements because my_matrix has 2 columns

[  8.2  10. ]


#Multiplying Matrices

If we have two 2D arrays (matrices) matrix_1 and matrix_2 where the number of columns of matrix_1 is the same as the number of rows of matrix_2 then we can use np.dot() to perform matrix multiplication.

In [44]:
matrix_1 = np.array([[1., 2., 3.],[4., 5., 6.]])
print matrix_1

[[ 1.  2.  3.]
 [ 4.  5.  6.]]


In [45]:
matrix_2 = np.array([[1., 2.], [3., 4.], [5., 6.]])
print matrix_2

[[ 1.  2.]
 [ 3.  4.]
 [ 5.  6.]]


In [46]:
print np.dot(matrix_1, matrix_2)

[[ 22.  28.]
 [ 49.  64.]]


In [47]:
import numpy as np
a = [3,6,9]
x = np.array(a)  #1-d
print x.T
y = np.array([a])  #2-d
print y.T


[3 6 9]
[[3]
 [6]
 [9]]


In [102]:
input1 = input("input num list seperated by ,:")
f = map(lambda input1 : input1 **2 , input1)

print f

input num list seperated by ,:quit


NameError: name 'quit' is not defined

In [103]:
# compare two numpy array and print number of same elements
import numpy as np
a = np.array([1, 2, 3, 4])
b = np.array([1, 2, 4, 3])
print np.sum(a == b)
print (a == b).sum()

A = np.array([1, 3, 5, 7])
B = np.array([2, 4, 6, 8])
ablist = zip(A,B)   # make tuple outof two np array in ordered way.
print ablist
x = [(0.1, 1.), (0.1, 2.), (0.1, 3.), (0.1, 4.), (0.1, 5.)]
normal_result = zip(*x)
print normal_result

2
2
[(1, 2), (3, 4), (5, 6), (7, 8)]
[(0.1, 0.1, 0.1, 0.1, 0.1), (1.0, 2.0, 3.0, 4.0, 5.0)]


In [104]:
import numpy as np
my_weights = np.array([1., 10.])
print my_weights
print np.arange(2,4)

[  1.  10.]
[2 3]


In [105]:
maxN = 0
count = 0
end =5
my_list = [1,3,5,3,6,7,8,3]
for i in range(0,6):
    if maxN < my_list[i]:
        maxN = my_list[i]
print maxN
    

7


In [106]:
import numpy as np
one_np = np.ones(5)
two_np = np.array([1,-1,1,-1,1,1])
print "one_np",one_np
count = 0
for i in one_np:
    if( i == +1):
    #if( i ==  1):  # same above
        count += 1  
print count
indicator = (one_np == +1)
print "(one_np == +1)",indicator
indicator2 = (two_np == +1)
print indicator2   #True is 1 False is 0
my_99np= np.array([1,2,3,4,5,6])
print np.dot(indicator2,my_99np)  #correct

one_np [ 1.  1.  1.  1.  1.]
5
(one_np == +1) [ True  True  True  True  True]
[ True False  True False  True  True]
15


In [107]:
#classification wke2 2 quiz num4,5
from __future__ import division
import graphlab
import math
import string
import numpy as np
# y val=   +1  -1     +1   +1
scores = [2.5, 0.3 , 2.8, 0.5]
exp_f = map(lambda x :1.0 +  math.exp(-1 * x),scores)
print exp_f
my_list6=[]
my_ans6 = 1
count = 1
for x in exp_f:
    ans6 = 1.0/float(x)
    if count == 2:
        ans6 = 1 - ans6
    print ans6    
    my_ans6 = my_ans6 * ans6
    count += 1
print "answer :",my_ans6    

#derivative calculate
num = 0
count_re = 1
my_ans_result = 0
for x1 in exp_f:
    ans_re = 1.0/float(x1)
    // if y val, i.e sentiment is -1 (in this example, second element)
    if count_re == 2:
        result_re = scores[num]*(-1 * ans_re) 
        print result_re    
    else :
        result_re = scores[num]* (1 - ans_re)
        print result_re
    my_ans_result = my_ans_result + result_re
    count_re += 1
    num += 1
print "deritive answer :",my_ans_result    
    

SyntaxError: invalid syntax (<ipython-input-107-3fdd646af5f4>, line 29)

In [108]:
#variable scope test
# This is a global variable
a = 0

if a == 0:
    # This is still a global variable
    b = 1

def my_function(c):
    # this is a local variable
    d = 3
    print(c)
    print(d)

# Now we call the function, passing the value 7 as the first and only parameter
my_function(7)

# a and b still exist
print(a)
print(b)

# c and d don't exist anymore -- these statements will give us name errors!
print(c)
print(d)

7
3
0
1


NameError: name 'c' is not defined

In [109]:
#max value return in list
list1, list2 = [123, 'xyz', 'zara', 'abc'], [456, 700, 200]

print "Max value element : ", max(list1)
print "Max value element : ", max(list2)

Max value element :  zara
Max value element :  700


In [110]:
list1, list2 = [123, 'xyz', 'zara', 'abc'], [456, 700, 200]
print "min value element : ", min(list1)
print "min value element : ", min(list2)


min value element :  123
min value element :  200


In [111]:
import numpy as np
np1 = np.array([0.03, 4.41, 2.05])
n1=np.array([3.29, 3.44, 3.67])
n2=np.array([0.82, 9.71, 3.88])
n3=np.array([8.34, 1.72, 0.02])
print np.dot(np1,n1)
print np.dot(np1,n2)
print np.dot(np1,n3)
print min(n1)

22.7926
50.7997
7.8764
3.29


In [112]:
#numpy.argmin(a, axis=None, out=None)[source]
#Returns the indices of the minimum values along an axis.
a = np.arange(6).reshape(2,3)
print a
print "np.argmin(a):",np.argmin(a)
#.argsort(a, axis=-1, kind='quicksort', order=None)
#Returns the indices that would sort an array.
# axis : int or None, optional
# Axis along which to sort. The default is -1 (the last axis). If None, the flattened array is used.
# kind : {‘quicksort’, ‘mergesort’, ‘heapsort’}, optional
# Sorting algorithm.
# order : str or list of str, optional
x = np.array([3, 1, 2])
print "x :",x
print "np.argsort(x) :",np.argsort(x)

[[0 1 2]
 [3 4 5]]
np.argmin(a): 0
x : [3 1 2]
np.argsort(x) : [1 2 0]


In [113]:
xx1 = np.array([[0, 3, 1], [4, 2, 3],[5,2,9]])
print "xx1 : \n",xx1
print "np.argsort(xx1,axis=0) \n", np.argsort(xx1,axis=0)
print "np.argsort(xx1,axis=1) \n",np.argsort(xx1,axis=1)
#print "np.argsort(xx1,axis=2) \n",np.argsort(xx1,axis=2)  #error: axis(=2) out of bounds
# print "np.argsort(xx1) \n",np.argsort(xx1)



xx1 : 
[[0 3 1]
 [4 2 3]
 [5 2 9]]
np.argsort(xx1,axis=0) 
[[0 1 0]
 [1 2 1]
 [2 0 2]]
np.argsort(xx1,axis=1) 
[[0 2 1]
 [1 2 0]
 [1 0 2]]


In [114]:
#convert to integer  
x33 = np.array([1, 2, 2.5])
print type(x33.shape)
x_t = x33.shape
print x_t
for xxx in range(len(x33)):
    print xxx
print "****************"
for xxx in x33 :
    print xxx
print "x33.astype(int):", x33.astype(int)

random_vector = (np.random.random(10)*10).astype(int)
print random_vector
#find indice of max number element
print "return indice of max number element",np.argmax(random_vector),"  it's number :",random_vector[np.argmax(random_vector)]
print np.argsort(random_vector.astype(int)) # return sorted indices.
k=4
print np.argsort(random_vector.astype(int))[0:k]

<type 'tuple'>
(3L,)
0
1
2
****************
1.0
2.0
2.5
x33.astype(int): [1 2 2]
[3 3 4 0 1 7 1 0 9 7]
return indice of max number element 8   it's number : 9
[3 7 4 6 0 1 2 5 9 8]
[3 7 4 6]


In [115]:
print "np.arange(1,15):",np.arange(1,15)
for k in  np.arange(1,15):
    print k

np.arange(1,15): [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14]
1
2
3
4
5
6
7
8
9
10
11
12
13
14


In [116]:
predict2 = (np.random.random(15)*10).astype(int)   
output2 = (np.random.random(15)*10).astype(int)
print predict2
print output2
for k in range(15):
    residual = predict2 - output2
    RSS = np.sum(residual**2)
    print RSS
    

[5 7 0 6 7 3 5 9 9 9 6 4 8 0 5]
[3 1 9 0 5 3 7 2 5 0 7 1 6 0 5]
325
325
325
325
325
325
325
325
325
325
325
325
325
325
325


In [117]:
import numpy as np
for k in np.arange(1,15):
    print k

1
2
3
4
5
6
7
8
9
10
11
12
13
14


In [118]:
#the first running vertically downwards across rows (axis 0),    ###very imiportant !!!
#and the second running horizontally across columns (axis 1).
import numpy as np
x = np.arange(12).reshape((3,4))
print " np.arange(12).reshape((3,4)):\n",x
print "x.sum(axis=0):",x.sum(axis=0)
print "x.sum(axis=1):",x.sum(axis=1)

print "np.argmax(x):",np.argmax(x)   #return max element's indice
print "np.argmax(x, axis=0) :",np.argmax(x, axis=0)  #vertically downwards across rows (axis 0)
print "np.argmax(x, axis=1) :",np.argmax(x, axis=1)  #horizontally across columns (axis 1)

 np.arange(12).reshape((3,4)):
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
x.sum(axis=0): [12 15 18 21]
x.sum(axis=1): [ 6 22 38]
np.argmax(x): 11
np.argmax(x, axis=0) : [2 2 2 2]
np.argmax(x, axis=1) : [3 3 3]


In [119]:
print np.random.random(15)

[ 0.12186325  0.33199008  0.13753099  0.71034249  0.56684318  0.47958142
  0.75588201  0.65505938  0.4714285   0.14392029  0.14959111  0.66104908
  0.74493346  0.08730134  0.3110809 ]


In [120]:
print (np.random.random(9)*10).astype(int)   #from 1 to 9 random number.   #random(9): from 0.0.. to 9.9..

[4 7 9 9 8 5 8 9 1]
