# Tools for Scientific Python

## numpy

Numerical computing, N-dimensional arrays, matrix algebra in Python. Most of the stack is built around NumPy.

http://www.numpy.org/

In [None]:
import numpy

A = numpy.random.rand(3, 3)

print(A)
print(A.T)

In [None]:
B = numpy.random.rand(3, 3)
print(numpy.dot(A, B))

In [None]:
print(numpy.vstack([A, B]))

# scipy

General-purpose scientific toolbox, including signal processing, statistics, optimization, linear algebra, etc.

http://docs.scipy.org/doc/scipy/reference/

In [None]:
import scipy.stats

# probability density at x=2.5 for Gaussian RV with μ=0 and σ=1
scipy.stats.norm.pdf(2.5, 0, 1)

In [None]:
import scipy.integrate

def func(x):
    return x**2+1

# numerically integrate x^2 + 1 between 0 and 1
scipy.integrate.quad(func, 1, 2)

In [None]:
import scipy.optimize

# find minimum of (x + 1) ^ 2 + 2x
f = lambda x: (x+1)**2 + 2*x
scipy.optimize.minimize_scalar(f)

## matplotlib

Basic 2D plots in Python

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def f(t):
    return np.exp(-t) * np.cos(2*np.pi*t)

t1 = np.arange(0.0, 5.0, 0.5)
t2 = np.arange(0.0, 5.0, 0.1)

plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k')
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')

## pandas

Data Analysis library providing:

http://pandas.pydata.org/

* data structures like `Series` and `DataFrame`
* easy selection, transformation, filtering and visualization of data
* I/O capabilities with various data formats

In [None]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.abs(np.random.randn(10, 4)), columns=['A', 'B', 'C', 'D'])
df

In [None]:
df.plot(kind='bar')

## scikit-learn

Machine learning and statistical learning in Python (look at statsmodels for statistical modeling and econometrics).

http://scikit-learn.org/stable/ 

https://github.com/statsmodels/statsmodels/

In [None]:
from sklearn import datasets
from sklearn import svm

# Load digits dataset
digits = datasets.load_digits()

# Create a support vector machine classifier and fit it to the digits data
clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(digits.data[:-1], digits.target[:-1])

# Classify one of the digits
print("True digit: " + str(digits.target[-1]))
print("Prediction: " + str(int(clf.predict(digits.data[-1]))))

# sympy

Symbolic math

http://www.sympy.org/en/index.html

In [None]:
import sympy

# Enable pretty printing
sympy.init_printing()

# Define a variable, x
x = sympy.symbols('x')

# Define an integral over x
a = sympy.Integral(sympy.cos(x) * sympy.exp(x), x)

# Compute the value of the integral
sympy.Eq(a, a.doit())

# pillow

Python imaging library, focusing on image transformations

https://python-pillow.github.io/

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from PIL import Image

im = Image.open("img/scipy.png")
plt.imshow(np.asarray(im))

In [None]:
plt.imshow(np.asarray(im.rotate(45)))

# scikit-image

Another image processing library, focusing more on computer vision.

http://scikit-image.org/

In [None]:
from skimage import data, io, filters

# Load the data (can be any NumPy array)
image = data.coins() 
io.imshow(image)

In [None]:
# Find the edges
edges = filters.sobel(image)

# Display the result
io.imshow(edges)

# bokeh

Interactive plotting library for web-based plots

http://bokeh.pydata.org/en/latest/

In [None]:
from bokeh.plotting import figure, output_notebook, show

# output to ipython notebook
output_notebook()

# create a new plot with a title and axis labels
p = figure(title="simple line example", x_axis_label='x', y_axis_label='y')

# add a line renderer with legend and line thickness
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], legend="Temp.", line_width=2)

# show the results
show(p)

# numba

Just-In-Time compiler to improve speed and performance of your python programs.

http://numba.pydata.org/

In [None]:
from numba import jit
from numpy import arange

# jit decorator tells Numba to compile this function.
# The argument types will be inferred by Numba when function is called.
@jit
def sum2d(arr):
    M, N = arr.shape
    result = 0.0
    for i in range(M):
        for j in range(N):
            result += arr[i,j]
    return result

a = arange(9).reshape(3,3)
print(sum2d(a))

# cython

Compile Python programs to C

http://cython.org/

In [None]:
%load_ext Cython

In [None]:
def pyfib(n):
    a, b = 0, 1
    for i in range(n):
        a, b = b, a + b        
    return b

In [None]:
%%cython -a

def cfib(int n):
    cdef int a = 0
    cdef int b = 1
    for i in range(n):
        a, b = b, a + b        
    return b

In [None]:
%timeit cfib(100)

In [None]:
%timeit pyfib(100)

# geopandas

http://geopandas.org/

* Geographic (GIS) operations in Python
* An alternative to PostGIS

# astropy

http://www.astropy.org/

Variety of astronomy tools for observers and theorists

# File Formats

* **CSV**
  * [built in](https://docs.python.org/3/library/csv.html)
* **HDF5**
  * http://www.pytables.org/
  * http://www.h5py.org/
* **NetCDF**
  * http://unidata.github.io/netcdf4-python/
* **JSON**
  * [built in](https://docs.python.org/3/library/json.html)
* **XML**
  * http://lxml.de/
* **GIS** (e.g. GeoJSON)
  * http://toblerity.org/fiona/

## Some others

* mpi4py, PyCUDA for HPC
* Theano, tensorflow, pycaffe for Deep Learning
* Blaze, Dask and PySpark for storing, describing, quering and processing data

# Installing Packages

* Most packages available as part of Anaconda distribution

* Package managers help you install others

* You can also install packages from source

## Conda

http://conda.pydata.org/

Included with Anaconda

Doesn't have everything, but installs binary packages

    conda search package_name
    conda install package_name
    conda uninstall package_name

## Pip

http://pip-installer.org

Standard Python installer

Included with Anaconda

    pip search package_name
    pip install package_name
    pip uninstall package_name

## Jupyter Notebook

* Combine code, data, visualizations and explanatory text into rich documents.
* You're looking at it!