In [None]:
import numpy as np
import scipy.linalg as la
import sympy as sym
sym.init_printing(use_unicode=False, wrap_line=False, no_global=True)


%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
#mpl.rcParams['savefig.dpi'] = 80
mpl.rcParams['figure.dpi'] = 80
# from IPython.display import set_matplotlib_formats
# set_matplotlib_formats('png', 'pdf')
%config InlineBackend.figure_format = 'retina'
#https://www.dataquest.io/blog/jupyter-notebook-tips-tricks-shortcuts/



import seaborn as sns
sns.set()
#sns.set_style(style= "whitegrid")
#plt.style.available
plt.style.use('fivethirtyeight')



## 1. Sympy Differential Function

http://docs.sympy.org/latest/tutorial/calculus.html

http://docs.sympy.org/latest/special_topics/finite_diff_derivatives.html

In [None]:
x = sym.Symbol("x")

In [None]:
3*x**2 +1

In [None]:
sym.diff(3*x**2 +1, x)

In [None]:
# evaluate at x = 2.0
sym.diff(3*x**2 +1, x).subs(x,2.)

## 2. Scipy Derivative Function

https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.derivative.html

`scipy.misc.derivative(func, x0, dx=1.0, n=1, args=(), order=3)`



Find the n-th derivative of a function at a point.

Given a function, use a **central difference** formula with spacing dx to compute the n-th derivative at x0.

**Parameters:**	


    func : function

        Input function.

    x0 : float

        The point at which n-th derivative is found.

    dx : float, optional

        Spacing.

    n : int, optional

        Order of the derivative. Default is 1.

    args : tuple, optional

        Arguments

    order : int, optional

        Number of points to use, must be odd.
        
    Notes：

        Decreasing the step size too small can result in round-off error.

In [None]:
from scipy.misc import derivative

In [None]:
def f(x):
    return 3*x**2 +1

derivative(f, 2.0) # evaluate at x = 2.0

## 3. Sympy Integration Function


http://docs.sympy.org/latest/modules/integrals/integrals.html

In [None]:
#Function
def f(x):
    return 1+np.cos(x)**2+x

In [None]:
x_array = np.linspace(0,2)
plt.plot(x_array, f(x_array))
plt.ylim((0,3.5));

In [None]:
sym.integrate(1+sym.cos(x)**2+x, x)

In [None]:
sym.integrate(1+sym.cos(x)**2+x, (x,0,2)).evalf()

## 3. Scipy Integration Function

https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quad.html#scipy.integrate.quad

### General integration (quad)


The function quad is provided to integrate a function of one variable between two points.

In [None]:
from scipy.integrate import quad

In [None]:
quad(f,0,2)

**Returns:	**

    y : float

        The integral of func from a to b.

    abserr : float

        An estimate of the absolute error in the result.

In [None]:
def f1(x):
    return np.exp(-x)*np.sin(3.0*x)

quad(f1,0., 2*np.pi)

## Scipy Integrate

https://docs.scipy.org/doc/scipy/reference/tutorial/integrate.html

https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quad.html#scipy.integrate.quad


## scipy.integrate.quad


`scipy.integrate.quad(func, a, b, args=(), full_output=0, epsabs=1.49e-08, epsrel=1.49e-08, limit=50, points=None, weight=None, wvar=None, wopts=None, maxp1=50, limlst=50)`



Compute a definite integral.

Integrate func from a to b (possibly infinite interval) using a technique from the Fortran library QUADPACK.



### 3.1 Newton-Cotes Methods: the Trapezoid Rule


#### 3.1.1 numpy.trapz

https://docs.scipy.org/doc/numpy/reference/generated/numpy.trapz.html


`numpy.trapz(y, x=None, dx=1.0, axis=-1)`



Integrate along the given axis using the composite trapezoidal rule.

Integrate y (x) along given axis.

**Parameters:**

    y : array_like

    Input array to integrate.

    x : array_like, optional

    The sample points corresponding to the y values. If x is None, the sample points are assumed to be evenly spaced dx apart. The default is None.

    dx : scalar, optional

    The spacing between sample points when x is None. The default is 1.

    axis : int, optional

    The axis along which to integrate.

**Returns:**

    trapz : float

    Definite integral as approximated by trapezoidal rule.
    
    
    
![](https://upload.wikimedia.org/wikipedia/commons/thumb/d/d1/Integration_num_trapezes_notation.svg/330px-Integration_num_trapezes_notation.svg.png)





In [None]:
x_int_array = np.linspace(0,2, num = 5)

x_int_array 

In [None]:
y_int_array = f(x_int_array)
y_int_array

In [None]:
x_array = np.linspace(0,2, num = 50)
plt.plot(x_int_array,y_int_array,'ro')#  , x_array, f(x_array),'b-')
plt.ylim((0,3.5));

In [None]:
np.trapz(y_int_array, x=x_int_array)

#### If we use different nodes or we forgot to specify the x  properly

In [None]:

plt.plot(x_int_array*2,y_int_array,'ro')#  , x_array, f(x_array),'b-')
plt.ylim((0,3.5));

In [None]:
np.trapz(y_int_array, x=x_int_array*2)


In [None]:
np.trapz(y_int_array)


#### If we use different nodes or we forgot to specify the x  properly

In [None]:

plt.plot(x_int_array*4,y_int_array,'ro')#  , x_array, f(x_array),'b-')
plt.ylim((0,3.5));

In [None]:
np.trapz(y_int_array, x=x_int_array*4)


In [None]:
np.trapz(y_int_array, dx=2)



#### 3.1.2 scipy.integrate.cumtrapz

https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.cumtrapz.html#scipy.integrate.cumtrapz

`scipy.integrate.cumtrapz(y, x=None, dx=1.0, axis=-1, initial=None)`


**Cumulatively** integrate y(x) using the composite trapezoidal rule.

**Parameters:**


    y : array_like

        Values to integrate.

    x : array_like, optional

        The coordinate to integrate along. If None (default), use spacing dx between consecutive elements in y.

    dx : float, optional

        Spacing between elements of y. Only used if x is None.

    axis : int, optional

        Specifies the axis to cumulate. Default is -1 (last axis).

    initial : scalar, optional

        If given, uses this value as the first value in the returned result. Typically this value should be 0. Default is None, which means no value at x[0] is returned and res has one element less than y along the axis of integration.

**Returns:**

    res : ndarray

        The result of cumulative integration of y along axis. If initial is None, the shape is such that the axis of integration has one less value than y. If initial is given, the shape is equal to that of y.

In [None]:
from scipy.integrate import cumtrapz

In [None]:
cumtrapz(y_int_array, x=x_int_array, initial=0) # cumulative integration of y along axis

#### If we use different nodes or we forgot to specify the x  properly

In [None]:
cumtrapz(y_int_array) # cumulative integration of y along axis

In [None]:
cumtrapz(y_int_array, initial=0) # cumulative integration of y along axis

In [None]:
cumtrapz(y_int_array, x=x_int_array*2, initial=0) # cumulative integration of y along axis

### 3.2 Newton-Cotes Methods: Simpson’s Rule

`scipy.integrate.simps(y, x=None, dx=1, axis=-1, even='avg')`


Integrate y(x) using samples along the given axis and the composite Simpson’s rule. If x is None, spacing of dx is assumed.

If there are an even number of samples, N, then there are an odd number of intervals (N-1), but Simpson’s rule requires an even number of intervals. The parameter ‘even’ controls how this is handled.

**Parameters:**	

    y : array_like

        Array to be integrated.

    x : array_like, optional

        If given, the points at which y is sampled.

    dx : int, optional

        Spacing of integration points along axis of y. Only used when x is None. Default is 1.

    axis : int, optional

        Axis along which to integrate. Default is the last axis.

    even : str {‘avg’, ‘first’, ‘last’}, optional

        ‘avg’ : Average two results:1) use the first N-2 intervals with
        a trapezoidal rule on the last interval and 2) use the last N-2 intervals with a trapezoidal rule on the first interval.

        ‘first’ : Use Simpson’s rule for the first N-2 intervals with
        a trapezoidal rule on the last interval.

        ‘last’ : Use Simpson’s rule for the last N-2 intervals with a
        trapezoidal rule on the first interval.
        
        
        
![](https://upload.wikimedia.org/wikipedia/en/6/67/Simpsonsrule2.gif)        


An animation showing how Simpson's rule approximation improves with more strips.

In [None]:
from scipy.integrate import simps

In [None]:

sym.integrate(1+sym.cos(x)**2+x, (x,0,2)).evalf()

In [None]:
# many nodes
x1 = np.linspace(0,2.,num=50)
x1

In [None]:
y1 = f(x1)
y1

In [None]:
plt.plot(x1,y1,'ro')#  , x_array, f(x_array),'b-')
plt.ylim((0,3.5));

In [None]:
I1 = simps(y1, x1)
I1

In [None]:
# fewer nodes
x2 = np.linspace(0,2.,num=5)
x2

In [None]:
y2 = f(x2)
y2

In [None]:
plt.plot(x2,y2,'ro')#  , x_array, f(x_array),'b-')
plt.ylim((0,3.5));

In [None]:
I2 = simps(y2, x2)
I2

## 3.3 Gauss-Legendre methods.

https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quadrature.html#scipy.integrate.quadrature

quadrature    -- Integrate with given tolerance using Gaussian quadrature.


`scipy.integrate.quadrature(func, a, b, args=(), tol=1.49e-08, rtol=1.49e-08, maxiter=50, vec_func=True, miniter=1)`


Compute a definite integral using fixed-tolerance Gaussian quadrature.

Integrate func from a to b using Gaussian quadrature with absolute tolerance tol.

**Parameters:**	


    func : function

        A Python function or method to integrate.

    a : float

        Lower limit of integration.

    b : float

        Upper limit of integration.

    args : tuple, optional

        Extra arguments to pass to function.

    tol, rtol : float, optional

        Iteration stops when error between last two iterates is less than tol OR the relative change is less than rtol.

    maxiter : int, optional

        Maximum order of Gaussian quadrature.

    vec_func : bool, optional

        True or False if func handles arrays as arguments (is a “vector” function). Default is True.

    miniter : int, optional

        Minimum order of Gaussian quadrature.

**Returns:**

    val : float

        Gaussian quadrature approximation (within tolerance) to integral.

    err : float

        Difference between last two estimates of the integral.
        
        
https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.fixed_quad.html#scipy.integrate.fixed_quad



![](https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/Comparison_Gaussquad_trapezoidal.svg/880px-Comparison_Gaussquad_trapezoidal.svg.png)

In [None]:
from scipy.integrate import quadrature

In [None]:
quadrature(f, 0,2.)

## 4. Python PDF parser and analyzer

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install --user pdfminer.six 
#pdfminer
#https://github.com/pdfminer/pdfminer.six

#### PDFMiner


Python PDF parser and analyzer


https://cdn.rawgit.com/pdfminer/pdfminer.six/eddf861f/docs/programming.html

https://cdn.rawgit.com/pdfminer/pdfminer.six/eddf861f/docs/index.html



In [None]:
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO

def extract_pdf_content(pdf):
    rsrcmgr = PDFResourceManager() ## Create a PDF resource manager object that stores shared resources.
    codec = 'utf-8'
    outfp = StringIO()
    laparams = LAParams()  # Set parameters for analysis.
    device = TextConverter(rsrcmgr=rsrcmgr, outfp=outfp, codec=codec, laparams=laparams) # Create a PDF device object.
    with open(pdf, 'rb') as fp: # Open a PDF file.
        interpreter = PDFPageInterpreter(rsrcmgr, device) # Create a PDF interpreter object.
        password = ""
        maxpages = 0
        caching = True
        pagenos=set()
        # Process each page contained in the document.
        for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
            interpreter.process_page(page) 
            # # Create a PDF parser object associated with the file object.
    # Performing Layout Analysis        
    mystr = outfp.getvalue() # receive the LTPage object for the page.
    device.close()
    outfp.close()
    return mystr

In [None]:
pdf = "Econ_457_Lecture_20.pdf"

In [None]:
content = extract_pdf_content(pdf)

In [None]:
content