In [1]:
## copied from the Harvard materials (sorry...)

# The %... is an iPython thing, and is not part of the Python language.
# In this case we're just telling the plotting library to draw things on
# the notebook, instead of on a separate window.
%matplotlib inline 
#this line above prepares IPython notebook for working with matplotlib

# See all the "as ..." contructs? They're just aliasing the package names.
# That way we can call methods like plt.plot() instead of matplotlib.pyplot.plot().

import numpy as np # imports a fast numerical programming library
import scipy as sp #imports stats functions, amongst other things
import matplotlib as mpl # this actually imports matplotlib
import matplotlib.cm as cm #allows us easy access to colormaps
import matplotlib.pyplot as plt #sets up plotting under plt
import pandas as pd #lets us handle data as dataframes
#sets up pandas table display
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns #sets up styles and gives us more plotting options

## Starting with Numpy

Not something I've understood before so here goes!

I'm now going to jump here: https://rohitmidha23.github.io/Numpy-Explained/?utm_campaign=News&utm_medium=Community&utm_source=DataCamp.com

Newer tools like pandas are built around Numpy arrays

In [2]:
np.random.seed(0) # seed for reproductability (i.e. this helps us to get back to this exact sequence of random numbers later)
x1 = np.random.randint(10, size=6) # one-dimensional array
x2 = np.random.randint(10, size=(3,3)) # two-dimensional array

In [3]:
print(x1)

[5 0 3 3 7 9]


In [4]:
print(x2)

[[3 5 2]
 [4 7 6]
 [8 8 1]]


#### Using np.array.()

In [5]:
x1 = np.array([1,2,3,4])
print(x1)
print(type(x1))

[1 2 3 4]
<class 'numpy.ndarray'>


In [6]:
x2 = np.array([[1,2,3],[4,5,6]])
print(x2)
print(type(x2))

[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'>


#### Using np.ndarray()

Think I roughly understand what this is doing (enabling n-dimensional arrays). However, not fully clear on the buffer variable's purpose.

Found this: https://jakevdp.github.io/blog/2014/05/05/introduction-to-the-python-buffer-protocol/ but haven't dug into in much detail.

In [7]:
x = np.ndarray(shape=3,dtype=int, buffer = np.array([1,2,3]))
print(x)
x = np.append(x,5)
print(x)
print(type(x))

[1 2 3]
[1 2 3 5]
<class 'numpy.ndarray'>


In [8]:
x = np.ndarray(shape=(2,2), dtype=float,buffer = np.array([[1.4,2.5],[1.3,2.4]]))
print(x)

[[1.4 2.5]
 [1.3 2.4]]


#### Attributes of ndarray

Each array has attributes ndim (the numbher of dimentions), shape (the size of each dimension) and size (the total size of the array):

In [9]:
x2 = np.array([[1,2,3],[4,5,6]])
print('x2.ndim = ', x2.ndim)
print('x2.shape = ', x2.shape)
print('x2.size = ', x2.size)

x2.ndim =  2
x2.shape =  (2, 3)
x2.size =  6


Data type is also useful: [array].dtype

In [10]:
print('x2.dtype = ',x2.dtype)

x2.dtype =  int32


#### Array Indexing: Accessing Single Elements

Square brackets enable us to access the desired index (similarly to Python lists). Index starts from zero (OBVIOUSLY).

In [11]:
x1 = np.array([1,2,3,4])
print("x1 = ",x1)
print("x1[0] = ",x1[0]) # just like arrays in c/c++
print("x1[-1] = ",x1[-1]) # negative indexing just like lists

x1 =  [1 2 3 4]
x1[0] =  1
x1[-1] =  4


In a multi-dimensional array, items can be accessed using a comma-separated tuple (like a list that can't be changed and with sq brackets rather than parentheses) of indices:

In [12]:
x2 = np.array([[1,2,3],[4,5,6],[7,8,9]])
print("x2 = "); print(x2)
print("x2[0] = ",x2[0]) # will print the entire 1st row

# to print 1st element of 1st row
print("x2[0][0]= ", x2[0][0])
print("x2[0,0] = ",x2[0,0])

# to print 2nd element of 3rd row
print("x2[2][1]= ", x2[2][1])
print("x2[2,1]= ", x2[2,1])

##### NOICE

x2 = 
[[1 2 3]
 [4 5 6]
 [7 8 9]]
x2[0] =  [1 2 3]
x2[0][0]=  1
x2[0,0] =  1
x2[2][1]=  8
x2[2,1]=  8


We can also change specific values with index notation

In [13]:
x2[0, 0] = 12
print(x2)

[[12  2  3]
 [ 4  5  6]
 [ 7  8  9]]


Unlike Python lists, NumPy arrays have a fixed type. This means, for example, if you attempt to insert a floating-point value to an integer array, the value will be silently truncated.

In [14]:
## example

x1 = np.ndarray(5, buffer = np.array([1,2,3,4,5]),dtype = int)
print(x1)
x1[2] = 5.7
print("x1 after changing : ",x1)

[1 2 3 4 5]
x1 after changing :  [1 2 5 4 5]


#### Array Slicing and Subsetting : Accessing Subarrays

Just as we can use square brackets to access individual array elements, we can also use them to access subarrays with the slice notation, marked by the colon (:) character. The NumPy slicing syntax follows that of the standard Python list; to access a slice of an array x, use :

In [None]:
x[start:stop:step]

In [15]:
x = np.ndarray(10, buffer = np.array([0,1,2,3,4,5,6,7,8,9]),dtype = int)
print(x)
print("x[:] = ",x[:])
print("x[:5] = ",x[:5]) # non-inclusive
print("x[5:] = ",x[5:])
print("x[1:5] = ",x[1:5])
print("x[1:5:2] = ",x[1:5:2]) # with a step of 2 this gives us every other
print("x[::-1] = ",x[::-1]) # this has a negative step so steps backwards

[0 1 2 3 4 5 6 7 8 9]
x[:] =  [0 1 2 3 4 5 6 7 8 9]
x[:5] =  [0 1 2 3 4]
x[5:] =  [5 6 7 8 9]
x[1:5] =  [1 2 3 4]
x[1:5:2] =  [1 3]
x[::-1] =  [9 8 7 6 5 4 3 2 1 0]


In [16]:
# similarly, we can also use this for multidimensional arrays:

np.random.seed(0)  # seed for reproducibility (this was in the tutorial but not sure why...)
x2 = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(x2)
print("x2[:2, :3] = "); print(x2[:2, :3])  # first two rows, first three columns
print("x2[:3, ::2] = "); print(x2[:3, ::2]) # all rows, every other column
print("x2[::-1, ::-1] = "); print(x2[::-1, ::-1]) #reversed 2D array
print("x2[:, 0] = ",x2[:, 0])  # first column of x2

[[1 2 3]
 [4 5 6]
 [7 8 9]]
x2[:2, :3] = 
[[1 2 3]
 [4 5 6]]
x2[:3, ::2] = 
[[1 3]
 [4 6]
 [7 9]]
x2[::-1, ::-1] = 
[[9 8 7]
 [6 5 4]
 [3 2 1]]
x2[:, 0] =  [1 4 7]


#### Joining Arrays:

Using concatenate:

In [18]:
x = np.array([1,2,3])
y = np.array([4,5,6])
z = np.concatenate([x,y]) # Combines x and y to give one array, with one dimension - how would we combine arrays to give one with two dimensions?
print(z)

[1 2 3 4 5 6]


In [21]:
x2 = np.array([[1,2,3],[2,3,4]])
y2 = np.array([[3,4,5],[4,5,6]])
z2 = np.concatenate([x2,y2]) # Bolts this onto the end, again how would we add this into the existing dimensions?
print(z2)

[[1 2 3]
 [2 3 4]
 [3 4 5]
 [4 5 6]]


#### Arithmetic Operations on 2D arrays

+	np.add
-	np.subtract
*	np.multiply

In [23]:
x2 = np.array([[1,2,3],[2,3,4]])
y2 = np.array([[3,4,5],[4,5,6]])
print("x2 + y2 = "); print(np.add(x2,y2))
print("x2 - y2 = "); print(np.subtract(x2,y2))

x2 + y2 = 
[[ 4  6  8]
 [ 6  8 10]]
x2 - y2 = 
[[-2 -2 -2]
 [-2 -2 -2]]


In [24]:
print(x2)
print(y2)
print("x2 * y2 = "); print(np.multiply(x2,y2))

[[1 2 3]
 [2 3 4]]
[[3 4 5]
 [4 5 6]]
x2 * y2 = 
[[ 3  8 15]
 [ 8 15 24]]


#### Covariance

In [25]:
x2 = np.array([[0,1,2],[2,1,0]])
print(x2)
print(np.cov(x2))

[[0 1 2]
 [2 1 0]]
[[ 1. -1.]
 [-1.  1.]]


Copied:

Note that again, C[0,1] and C[1,0] which shows the correlation between x[0] and x[1], is negative.

However C[0,0] and C[1,1] which show the correlation between x[0] and x[0] and x[1] and x[1] is 1

In [26]:
# another example with how x and y are combined
x = np.array([-2.1, -1,  4.3])
y = np.array([3,  1.1,  0.12])
X = np.stack((x, y))
print(np.cov(X))
# To check
print(np.cov(x))
print(np.cov(y))

[[11.71       -4.286     ]
 [-4.286       2.14413333]]
11.709999999999999
2.1441333333333334


#### Correlation

In [27]:
x = np.array([-2.1, -1,  4.3])
y = np.array([3,  1.1,  0.12])
X = np.stack((x, y))
print(np.corrcoef(X))

[[ 1.         -0.85535781]
 [-0.85535781  1.        ]]


Correlation Matrix is the normalised version of the Movariance Matrix