# Introduction to NumPy

Lots of these notes are from Chapter 2 of VanderPlas -- you should definitely go through his chapter carefully.  I will use the book notes and these in class.

The NumPy documentation: https://docs.scipy.org/doc/numpy/

In [None]:
# import the NumPy module and display the version number
import numpy as np
np.__version__

In [None]:
# Build-in documentation
# np.<TAB> in IPython
np?

In [None]:
# see the things in the np namespace
dir(np)

In [None]:
# Create a function to display the array attributes and data (optinally).  
#  Note the use of the default value for the show_data parameter.  Made
#  this part of the display optional (and off by default) to support large
#  arrays (that you wouldn't want to try to show explicitly)
def show(the_array, show_data = 0):
    print("  Dimension: {:}".format(the_array.ndim))
    print("       Size: {:}".format(the_array.size))
    print("      Shape: {:}".format(the_array.shape))    
    print("  Data Type: {:}".format(the_array.dtype))    
    print("  Item Size: {:}".format(the_array.itemsize))
    print("Data Buffer: {:}".format(the_array.data))
    if show_data:
        print(the_array)

## NumPy Arrays
### Creating Arrays and Access Elements

In [None]:
# Create a NumPy array from a list
a = np.array([1.5, 0.7, 22.4])
show(a, 1)

In [None]:
# The second element
a[1]

In [None]:
# Multi-dimensional array.  Note that NumPy arrays are homogenous -- all
#   elements are the same data type (dtype) -- integers in this case
b = np.array(
    [ [1,  2,  3,  4,   5]
     ,[6,  7,  8,  9,  10]
     ,[11, 12, 13, 14, 15] ] )
show(b, 1)

In [None]:
# the (i, j)the element
i = 2
j = 3
b[i, j]

In [None]:
# Note that if one elements is a float, all will be upcast since
# NumPy arrays are homogeneous
b = np.array(
    [ [1.0,  2,  3,  4,   5]
     ,[6,  7,  8,  9,  10]
     ,[11, 12, 13, 14, 15] ] )
show(b,1)
# since Python lists are heterogeneous (in data types), there
# is no upcasting and a similar definition would have a list of lists
# with 1 float and the rest integers.

In [None]:
# zeros to create and array and initialize with zeros. Ones() works similarly.
c = np.zeros((3, 8), dtype=int)
show(c, 1)

In [None]:
# full() for arbitrary values
c = np.full((3, 3, 3), 82.6)
show(c, 1)

In [None]:
# Create a NumPy array and initialize it with integers from 0 - 14 using
#   the arange() function.
a = np.arange(15)
show(a, 1)

In [None]:
# Create the same array, but then reshape it to a 3x5 matrix (a 
#   2-dimensional array, technically)
a = np.arange(15).reshape(3,5)
show(a, 1)

In [None]:
# Create a 20-element array of floats form 0 - 19 and reshape it to a 5x4 array.
# Note that we create an anonymous array and send that to
# the show() function.  The structure will be garbage-collected
# after the function call since it is anonymous.
show(np.arange(20.0).reshape(5,4), 1)

In [None]:
# Use a comprehension - From VanderPlas
# Nested lists result in multi-dimensional arrays
# Creating an anonymous array (see above)
show(np.array([range(i, i + 3) for i in [2, 4, 6]]), 1)

In [None]:
# Show the details of what's going in the above
[range(i, i + 3) for i in [2, 4, 6]]

In [None]:
[list(range(i, i + 3)) for i in [2, 4, 6]]

In [None]:
# Create a 3x3 array of normally distributed random values
# with mean 96 and standard deviation 14
c = np.random.normal(96, 14, (3, 3))
show(c,1)

In [None]:
# Some more samples from VanderPlas
np.random.seed(0)  # seed for reproducibility

x1 = np.random.randint(10, size=6)  # One-dimensional array
x2 = np.random.randint(10, size=(3, 4))  # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5))  # Three-dimensional array

In [None]:
# Iterating through a Python list of NumPy arrays.
al = [x1, x2, x3]
for a in al:
    show(a, 1)
    print("")

In [None]:
# Element (1, 2, 3) from x3
x3[1, 2, 3]

In [None]:
# flatten a multi-dimensional array
show(x3.flatten(), 1)

In [None]:
# NumPy arrays are mutable.
x2[0,0] = 12
x2

In [None]:
# Note the behavior when we try to assign a different data type ...
x2[1, 1] = 7.325
show(x2,1)

### Reshaping and np.newaxis

In [None]:
x = np.array([1, 2, 3])
x

In [None]:
# What is the difference between x and y?
y = x.reshape((1,3))
y

In [None]:
# What is the difference between y and z?
z = x.reshape((3,1))
z

In [None]:
x.shape, y.shape,z.shape

In [None]:
x[np.newaxis, :]

In [None]:
x[:,np.newaxis]

In [None]:
a = np.arange(27).reshape((3,3,3))
a

In [None]:
a = np.arange(81).reshape((3,3,3,3))
a

 ### Slices - Views and copies
 
 Slice: [i:j:k] - start:stop:stride

In [None]:
# slice example - upper-left 2x3 sub-matrix
# :2 - rows 0, 1
# :3 - columns 0, 1, 2
x2[:2, :3]

In [None]:
# lower right 2x3
x2[-2:, -3:]

In [None]:
# middle 1x2
x2[1:-1, 1:-1]

In [None]:
# note that slices are (by default) views of the array, not copies.
x = x2[:2, :3]
x, x2

In [None]:
x[0, 0] = 477
x, x2

In [None]:
# If you want a copy, rather than a view, use the copy() function.
x = x2[:2, :3].copy()
x[0, 0] = 976
x, x2

## Array Concatenation and Splitting - VP 02.02

## Universal Functions and the Slowness of Loops - VP 02.03

## Axes and Aggregate Functions

In [None]:
a = np.random.normal(5, 1, (5, 3))
show(a, 1)

In [None]:
# Overall sum -- all elements of the array
a.sum()

In [None]:
# Sum along an axis
a.sum(axis=0)

In [None]:
# Sum along the other axis
a.sum(axis=1)

In [None]:
# the average of the 3rd column
a.mean(axis=0)[2]

In [None]:
# or
ax = 0
a.sum(axis=ax)[2]/a.shape[ax]

In [None]:
# average of the 4th row
a.mean(axis=1)[3]

In [None]:
# or
ax = 1
a.sum(axis=ax)[3]/a.shape[ax]

In [None]:
b = np.random.randint(1, 6, (3, 4, 6))
show(b, 1)

In [None]:
# Mean of the "planes"
b.mean(axis=0)

In [None]:
# Mean of the ?
b.mean(axis=1)

In [None]:
# Mean of the ?
b.mean(axis=2)

## Example with A Dataset from a CSV file

In [None]:
# erv.csv data - a (100x15 matrix of floats)
erv = np.genfromtxt('..\data\erv.csv', delimiter=',')
show(erv)

In [None]:
# show a sample - upper left 5x5 (first five columns of the first 5 rows)
for r in erv[:5, :5]:
    print(r)

In [None]:
# Use the aggregate sum to find the column averages in one statement.
erv.mean(axis=0)

In [None]:
# row averages
erv.mean(axis=1)

In [None]:
# Load matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
# Show a histogram of the jth column (j between 0 and 14)
j = 9
plt.hist(erv[:,j])
plt.show()

In [None]:
# show histograms of all 15 columns in a single plot
plt.figure(figsize=(20, 8))
for j in range(1, 16):
    plt.subplot(3,5,j)
    plt.hist(erv[:,j-1])
plt.show()

In [None]:
# or with color
c = ['orange', 'green', 'red', 'beige', 'brown'
    ,'dimgray', 'firebrick', 'darkkhaki', 'indigo', 'darksalmon'
    ,'forestgreen', 'fuchsia', 'darkcyan', 'darkviolet','darkgoldenrod'
    ]
plt.figure(figsize=(20, 8))
for j in range(15):
    plt.subplot(3,5,j+1)
    plt.hist(erv[:,j], color=c[j])
plt.show()

In [None]:
# Scatter plot of column col1 vs column col2
col1 = 0
col2 = 14
plt.scatter(erv[:, col1], erv[:, col2]);

In [None]:
plt.scatter?