In [2]:
#1 NumPy Arrays

In [3]:
import numpy as np

In [4]:
# NumPy arrays come in two flavours: 
#vector - one dimensional, like a list, and matrix - two dimensional

In [6]:
# Creating a Numpy array. If you have a list, 
# you can cast that to an array

In [7]:
my_list = [1,2,3]

In [8]:
np.array(my_list) # essentially, just pass it an obj like the list

array([1, 2, 3])

In [9]:
# to confirm that it really is an array, not list, save it as a var

In [12]:
x = np.array(my_list) 

In [13]:
# and check the TYPE of x

In [14]:
type(x)

numpy.ndarray

In [15]:
# if you actually want to build a matrix, create a var and have 
#a nested list - a list of list. So, you have a list w/three items, 
# which are lists themselves with three additional items.

In [17]:
my_matrix = [[1,2,3],[4,5,6],[7,8,9]]

In [22]:
# if we pass this to numpy array, we get an array, 
#but the dimensionality has been taken into consideration
#when displaying the output. Numpy is smart enough to 
# know that this is a two dimensional array or matrix (3 columns and rows).

In [21]:
np.array(my_matrix)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [24]:
# the angle brackets show the dimension. 
#You can go to 3, 4 dim etc. Financial data rarely goes beyond
# 2, sometimes 3 dimensions. 

In [25]:
#Next, a few built-in methods to generate quick arrays with numpy.

In [26]:
# you can cast the Python built-in function range(), it gives
#back a list of integers.

In [27]:
list(range (0,5))

[0, 1, 2, 3, 4]

In [31]:
# there's a numpy version of this which creates an array version 
# of that list. It can take a starting, ending point and a step size.

In [30]:
np.arange(0,5)

array([0, 1, 2, 3, 4])

In [33]:
np.arange(1,11,2) #for all the odd numbers. For all the even numbers,
#you'd start at zero ie. (0,11,2)

array([1, 3, 5, 7, 9])

In [34]:
#generating arrays of zeros and ones - 
#useful for some financial models. Note the dot, which
# indicates that this is a floating point number

In [35]:
np.zeros(3)

array([0., 0., 0.])

In [36]:
# Remember there are two major types of number in Python, integers
# and floats - which we can check with type(). 
# Later, especially when working with Pandas, when you enter an
# integer, it will convert it to float, just not to lose any data. 
# that way, if you have eg. 0.5, it won't accidentally round up 
# to zero or to one. 

In [37]:
type(1)

int

In [38]:
type(1.0)

float

In [41]:
# to create the same array, but the two dimensional version, you 
#still use np.zeros(), but you enter another bracket and pass it 
# a tuple of dimensions. The first number - index zero - stands for rows, 
#second - index one - for columns, eg.

In [40]:
np.zeros((3,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [42]:
# simpiar 'ones function'

In [43]:
np.ones(4)

array([1., 1., 1., 1.])

In [44]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [48]:
# to return an EVENLY spaced numbers over a specified interval,
# you can use np.linspace ('linearly spaced'). 
#Press shift-tab to see what parameters it takes in the brackets.
# so, it takes start, stop and a number. It asks how many numbers
# you want in between the start and stop, where range creates 
# numbers at every specified interval.

In [46]:
np.linspace(0,10,3)

array([ 0.,  5., 10.])

In [47]:
np.linspace(0,10,30)

array([ 0.        ,  0.34482759,  0.68965517,  1.03448276,  1.37931034,
        1.72413793,  2.06896552,  2.4137931 ,  2.75862069,  3.10344828,
        3.44827586,  3.79310345,  4.13793103,  4.48275862,  4.82758621,
        5.17241379,  5.51724138,  5.86206897,  6.20689655,  6.55172414,
        6.89655172,  7.24137931,  7.5862069 ,  7.93103448,  8.27586207,
        8.62068966,  8.96551724,  9.31034483,  9.65517241, 10.        ])

In [49]:
# remember indexing starts at 0, if trying to get nice even cutoffs.

In [50]:
# an identity matrix that you can call. 
#It uses only 0 and 1 to create a square with a diagonal of 1.

In [51]:
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [52]:
# let's talk about numpy's random library/module. Lots of ways to 
# create random number arrays. Eg. in financial data to randomly 
#model something like 'a Monte Carlo simulation'. Once you type
# np.random. , you can type tab to see 
#'all the methods and distributions you can use' for generating
#random numbers. Again, shift-tab tells you what they do. See the 
#explanation for np.random.rand, which will give uniformly distributed
# values between 0 and 1, meaning that they all have the same odds
# of being picked.

In [53]:
np.random.rand(1)

array([0.2649433])

In [54]:
np.random.rand(1)

array([0.70566985])

In [55]:
np.random.rand(5,5)

array([[0.80641503, 0.20449906, 0.82805069, 0.40391744, 0.58730617],
       [0.27655744, 0.29133967, 0.84713323, 0.9423696 , 0.78916631],
       [0.3610296 , 0.6280044 , 0.33713986, 0.84225899, 0.45251137],
       [0.90896411, 0.20000538, 0.21542797, 0.2602654 , 0.22437359],
       [0.1931093 , 0.46846694, 0.37798568, 0.40965002, 0.77246754]])

In [59]:
# next, sampling from a random standard normal distribution. Unlike
# rand, it is not uniform. Shows a bell curve graphic - the closer 
# you are to zero (the centre of the bell curve), the higher
# the possibility of the number to be picked. 
# Shift-tab to the rescue. Also called a Gaussian distribution. 
# "because the mean centred at zero with a variance of 1, 
#you can also get negative numbers off this."


In [60]:
np.random.randn(5)

array([-0.58982173, -0.01306525,  0.66722117,  0.13268309,  0.17559528])

In [62]:
np.random.randn(5,4)

array([[ 0.52854258,  0.41380198, -0.63689191,  0.44757184],
       [-0.42895312, -1.09666776, -0.21869345, -0.45592556],
       [-1.02800383, -1.05992185,  0.91469086,  0.15074316],
       [-0.25627691, -1.67498977,  0.45547928,  1.24875402],
       [-0.10513116,  1.83792079,  0.84792812, -2.2232169 ]])

In [65]:
# simply to get a random integer use np.random.randint(). 
#See probability options.

In [64]:
np.random.randint(1,100)

83

In [66]:
 np.random.randint(1,100)

65

In [68]:
np.random.randint(1,100,10) # 1 inclusive, 100 exclusive

array([28, 94, 12, 28,  7, 47, 86, 42, 40, 11])

In [69]:
#some other useful array attributes and methods

In [70]:
arr = np.arange(25)
ranarr = np.random.randint(0,50,10)

In [71]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [72]:
ranarr

array([25, 30, 14,  8,  5, 39, 26, 20, 45, 23])

In [74]:
# a really common method to use is the reshape method - returns 
# an array containing the exact same data but in a new shape.
# right now, our array is a simple one-dimensional array from 0 to 24.
# You can call reshape on this and reshape it to an array 
# of a different size eg. a 5 by 5 matrix.

In [75]:
arr.reshape(5,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [76]:
#note that if you try 5 by 4, it'll return an error 
#because it can't 'reshape array of size 25 into shape (5,4). 

In [77]:
arr.reshape(5,4)

ValueError: cannot reshape array of size 25 into shape (5,4)

In [78]:
# a quick, clever trick for this is to multiply these to equal 25.
# this is why 5 by 5 works better.

In [79]:
# the shape attribute is just the shape of the array. Looking at
# our original array, we can ask for its attribute.

In [80]:
arr.shape

(25,)

In [82]:
# 25, and nothing. Essentially, this indicates that this 
#is a one-dimensional array. So, it has 25, on one axis.
# if we were to reshape this with arr.reshape(5,5).shape ,
# we'd get the dimensions (5,5):

In [83]:
arr.reshape(5,5).shape

(5, 5)

In [87]:
# we could do (25,1) which would just mean that instead of 25 columns 
# and one single row, we'd reverse that. Note the hash he inserted 
# to display the array, not just reshape it. "this is the difference 
# between (25,1) and just (25.)"

In [86]:
arr.reshape(25,1)#.shape

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19],
       [20],
       [21],
       [22],
       [23],
       [24]])

In [88]:
#if at any time you want to know the data type that 
#your array is holding, just ask for the arr.dtype attribute. Here,
# 64 bit integers (his returned dtype('int32')

In [89]:
arr.dtype

dtype('int64')

In [90]:
#Finally, four useful key methods, doing these off the random array.


In [92]:
ranarr

array([25, 30, 14,  8,  5, 39, 26, 20, 45, 23])

In [93]:
# to grab the max - or, highest - number of an array

In [95]:
ranarr.max()

45

In [96]:
#For its index location, call argmax

In [97]:
ranarr.argmax()

8

In [98]:
# for the minimum value, min

In [99]:
ranarr.min()

5

In [101]:
ranarr.argmin() #for its index location

4

In [None]:
#That's really it for the basics of NumPy arrays.