Import dependencies:

In [None]:
import numpy as np
import numexpr as ne


import os

import importlib
import poppy
importlib.reload(poppy)
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['image.origin'] = 'lower'
matplotlib.rcParams['image.interpolation'] = 'nearest'
matplotlib.rcParams['font.size'] = 9

import astropy.io
import astropy.units as u

import scipy.interpolate
import scipy.signal
import scipy.optimize

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARN)



%matplotlib inline


In [None]:
#import accelerate # for earlier releases

Print the this notebook's current git commit

In [None]:
%%bash
git rev-parse HEAD


In [None]:
np.__config__.show()

In [None]:
ne.ncores,ne.nthreads,ne.show_config()

In [None]:
%%bash
cat /proc/cpuinfo | grep 'model name' | uniq
 lscpu | egrep '^Thread|^Core|^Socket|^CPU\('



In [None]:
%%bash
nvidia-smi



# 1. circular pupil

In [None]:
from  poppy.accel_math import  _USE_CUDA, _USE_NUMEXPR, _FFTW_AVAILABLE

print(_USE_CUDA, _USE_NUMEXPR,_FFTW_AVAILABLE)
print("current POPPY version: "+str(poppy.__version__))


In [None]:
intermediate=  poppy.poppy_core.PlaneType.intermediate 
wavelen = 770e-9

D_prim = 2.37 * u.m
fr_pri = 7.8
fl_pri = D_prim * fr_pri

D_relay = 20 * u.mm
fl_m2 = fl_pri * D_relay / D_prim
fr_m3 = 20.
fl_m3 = fr_m3 * D_relay

def optsys(npix=1024,ratio=0.25):
    optsys = poppy.FresnelOpticalSystem(pupil_diameter=D_prim,
    						 npix=npix, beam_ratio=ratio)

    m1 = poppy.QuadraticLens(fl_pri, name='Primary')
    m2 = poppy.QuadraticLens(fl_m2, name='M2')
    m3 = poppy.QuadraticLens(fl_m3, name='M3')
    m4 = poppy.QuadraticLens(fl_m3, name='M4')

    optsys.add_optic(poppy.CircularAperture(radius=D_prim.value/2))
    optsys.add_optic(m1)
    optsys.add_optic(m2, distance = fl_pri + fl_m2)
    optsys.add_optic(m3, distance = fl_m2 + fl_m3)

    optsys.add_optic(poppy.ScalarTransmission(planetype=intermediate,
    name='focus'),
    	 distance=fl_m3)
    return optsys
    
optsys4096=optsys(npix=1024,ratio=0.25)
psf = optsys4096.calcPSF(wavelength=wavelen, 
						display_intermediates=False, 
						return_intermediates=False)
wfirst_optsys1024=sys(npix=256,ratio=0.25)
wfirst_optsys8192=sys(npix=2048,ratio=0.25)




In [None]:
ne.ncores,ne.nthreads,ne.show_config()

In [None]:
psf_timing = %prun -r  -s  tottime wfirst_optsys4096.calcPSF(wavelength=wavelen, display_intermediates=False, return_intermediates=False)



In [None]:
psf_timing.print_stats()

In [None]:
#from astropy.table import Table
import pandas as pd
pixlist = pd.Series([2**4,2**5,2**6,2**7,2**8,2**9,2**10])#,2**11])])



In [None]:
scenarios=[[False,False,False],
          [False,False,True],
          [False,True,True],
          [True,True,True],
          ]

In [None]:
timings={}
import importlib

for combo in scenarios:
    df = pd.DataFrame(np.zeros([pixlist.size,2]), index=pixlist, columns=["avg","std"])
    print(combo)
    poppy.accel_math._USE_CUDA  = combo[0]
    poppy.accel_math._USE_NUMEXPR  = combo[1]
    poppy.conf.use_fftw = combo[2]
    #reload libraries to make sure variables are set.
    importlib.reload(poppy.fresnel)
    importlib.reload(poppy.optics)
    importlib.reload(poppy)

    print(poppy.fresnel._USE_CUDA,poppy.fresnel._USE_CUDA,poppy.conf.use_fftw )
    for i,npix in enumerate(pixlist):
        test_optsys=optsys(npix=npix,ratio=0.25)
        time = %timeit -o test_optsys.calcPSF(wavelength=wavelen, display_intermediates=False, return_intermediates=False)
    
        df.iloc[i]["std"] = time.stdev
        df.iloc[i]["avg"] = time.average
    outname='cuda'+str(poppy.fresnel._USE_CUDA)+'NumExpr'+str(poppy.fresnel._USE_NUMEXPR)+'FFT'+str(poppy.conf.use_fftw)+'.csv'
    df.to_csv(outname,float_format="%3.3e")
    print(outname)
    psf_timing = %prun -r  -s  tottime wfirst_optsys.calcPSF(display_intermediates=False, return_intermediates=False)
    psf_timing.print_stats()



In [None]:
outname='cuda'+str(_USE_CUDA)+'NumExpr'+str(_USE_NUMEXPR)+'FFT'+str(poppy.conf.use_fftw)+'.csv'
df.to_csv(outname,float_format="%3.3e")
print(outname)

In [None]:
plt.figure(dpi=400)
ax=plt.subplot(111)


y=pd.read_csv('cuda'+str(False)+'NumExpr'+str(False)+'FFT'+str(False)+'.csv',index_col=0)
#y["avg"].plot.bar(yerr=y["std"],
#                       xticks=pixlist,label="NumPy",color="orange",linestyle="--",
#                   alpha=.7,ax=ax)


y_FFTW=pd.read_csv('cuda'+str(False)+'NumExpr'+str(False)+'FFT'+str(True)+'.csv',index_col=0)

#ax=y_cuda_numexpr["avg"].plot.bar(yerr=y_cuda_numexpr["std"],label="NumExpr+CUDA")
ax.errorbar(pixlist,y_FFTW["avg"]/y["avg"],
            yerr=y_FFTW["avg"]/y["avg"]*np.sqrt((y_FFTW["std"]/y_FFTW["avg"])**2+(y["std"]/y["avg"])**2),
            label="FFTW", )

y_numexpr=pd.read_csv('cuda'+str(False)+'NumExpr'+str(True)+'FFT'+str(True)+'.csv',index_col=0)
#y_cuda["avg"].plot.bar(yerr=y_cuda["std"],
#                       xticks=pixlist,label="CUDA",color="orange",alpha=.7,ax=ax)

ax.errorbar(pixlist,y_numexpr["avg"]/y["avg"],
            yerr=y_numexpr["avg"]/y["avg"]*np.sqrt((y_numexpr["std"]/y_numexpr["avg"])**2+(y["std"]/y["avg"])**2),
            label="NumExpr+FFTW",)

y_all=pd.read_csv('cuda'+str(True)+'NumExpr'+str(True)+'FFT'+str(True)+'.csv',index_col=0)
#y_cuda["avg"].plot.bar(yerr=y_cuda["std"],
#                       xticks=pixlist,label="CUDA",color="orange",alpha=.7,ax=ax)

ax.errorbar(pixlist,y_all["avg"]/y["avg"],
            yerr=y_all["avg"]/y["avg"]*np.sqrt((y_all["std"]/y_all["avg"])**2+(y["std"]/y["avg"])**2),

            label="Numexpr+CUDA",  )



ax.plot([0,np.max(pixlist)],[1,1],linewidth=2,alpha=.5,color="black")
ax.text(256,1.02,"NumPy",color="gray")
ax.set_xticks([pixlist[0]]+list(pixlist[3:]))
#ax.set_xticks(range(len(pixlist)))
ax.set_xticklabels(ax.get_xticks()/wfirst_optsys.beam_ratio)
ax.set_yticks(np.arange(0,1.1,.1))
ax.set_ylabel("Fractional Run Time")
plt.legend()
plt.grid()
plt.xlabel("Array Dimensions [pix]")
plt.ylim([0,1.1])
#plt.yscale("log")
plt.savefig("benchmarks%icores.pdf"%(ne.ncores),bbox_inches="tight")

In [None]:
plt.figure(figsize=[4,3])
ax=plt.subplot(111)

ticks=np.int_(pixlist*4)
y=pd.read_csv('cuda'+str(False)+'NumExpr'+str(False)+'FFT'+str(False)+'.csv',index_col=0)
y["avg"].plot.bar(yerr=y["std"],
                       #ticks=ticks,
                  label="NumPy",color="orange",linestyle="--",
                   alpha=.7,ax=ax)

#ax.text(256,1.02,"NumPy",color="gray")
ax.set_ylabel("Run Time [sec]")
plt.legend()
plt.minorticks_on()

plt.grid(b=True, which='major', color='b', linestyle='-',alpha=.5)

plt.grid(b=True, which='minor', color='r', linestyle='--',alpha=.5)
plt.xlabel("Array Dimensions [pix]")
#plt.yscale("log")
plt.yscale("log")
ax.set_xticklabels(ticks)


plt.savefig("NumPy_runtime%icores.pdf"%(ne.ncores),bbox_inches="tight")

In [None]:
ticks

In [None]:
x=ax.get_xticklabels()[0]
np.array()

##  83e5e2d3be3240c2867a02f8e89c7b1802cb3a4e


environment accelerate 83e5e2d3be3240c2867a02f8e89c7b1802cb3a4e:

`ne.ncores,ne.nthreads,ne.show_config()`

```mkl_info:
    libraries = ['mkl_rt', 'pthread']
    library_dirs = ['/home/edouglas/miniconda3/envs/anaconda/lib']
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    include_dirs = ['/home/edouglas/miniconda3/envs/anaconda/include']
Out[29]:
(8, 8, None)
```

### standard install with np.fft
```
1.58 s ± 36.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
32.2 s ± 710 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
2min 8s ± 3.18 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
```
### standard install with pyfftw:
```
1.21 s ± 32.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
24 s ± 249 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1min 34s ± 791 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

```
### NumExpr but no cuda:


```
959 ms ± 18.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
20.5 s ± 726 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
1min 22s ± 465 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
```

### NumExpr +  cuda, 2x faster:
```
497 ms ± 15.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
9.23 s ± 51.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
40 s ± 387 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
```

# Old:

commit:4d86854bc2e709d7415ac8e9075098c7f9b4dc16

```%timeit  wfirst_optsys.calcPSF(wavelength=wavelen, display_intermediates=False, return_intermediates=False)```

```
5.8 s ± 58.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [16]:
```

commit:8049b9257a2239944f3ca5358462580d4a47804b
```
5.66 s ± 28.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [8]:
```getphasor percall: 0.353




In [None]:
plt.figure(figsize=[4,4])
x=np.array([1024,4096,8192])
y_numexpr=np.array([0.959,20.5,82])
y_numexpr_err=np.array([0.0183,0.726,0.465])

y=np.array([1.58,32.2,128])
y_err=np.array([0.036,0.710,3.28])

y_fftw=np.array([1.21,24,93])
y_fftw_err=np.array([0.032,0.249,0.791])



y_numexpr_cuda=np.array([0.497,9.23 ,40])
y_numexpr_cuda_err=np.array([0.0158,0.0516,0.387])
plt.errorbar(x,y/y,fmt='.-',label="numpy")#,yerr=y_err)


plt.errorbar(x,y_fftw/y,label="FFTW",fmt='.-')#,yerr=y_numexpr_err)

plt.errorbar(x,y_numexpr/y,fmt='.-',label="FFTW+NumExpr",)#,yerr=y_numexpr_err)




plt.errorbar(x,y_numexpr_cuda/y,label="cuda+NumExpr",fmt='.-')#,yerr=y_numexpr_cuda_err)
#plt.yscale()
plt.legend()
plt.ylabel("numpy normalized time")
plt.xlabel("array dimension")
plt.title("3 surface optical system")

In [None]:
%%prun
poppy.conf.use_cuda=False
psf = wfirst_optsys.calcPSF(wavelength=wavelen, display_intermediates=False, return_intermediates=False)


In [None]:
poppy.display_psf(psf)


In [None]:
plt.figure(figsize=(12,8))
psf = wfirst_optsys.calcPSF(wavelength=wavelen, display_intermediates=True, return_intermediates=True)

In [None]:
#exit_wf = waves[-1]
plt.imshow(np.log10(wf.intensity), cmap='gray', vmin=-6, vmax=0)
plt.colorbar()

# 2. WFIRST pupil

In [None]:
os.environ['WEBBPSF_PATH'] = os.path.expanduser('~/STScI/WFIRST/webbpsf-data/')

In [None]:
wavelen = 770e-9

Tel_fname = os.path.join(os.environ['WEBBPSF_PATH'], "AFTA_CGI_C5_Pupil_onax_256px_flip.fits")
SP_fname = os.path.join(os.environ['WEBBPSF_PATH'], "CGI/optics/CHARSPC_SP_256pix.fits.gz")
FPM_fname = os.path.join(os.environ['WEBBPSF_PATH'], "CGI/optics/CHARSPC_FPM_25WA90_2x65deg_-_FP1res4_evensamp_D072_F770.fits.gz")
LS_fname = os.path.join(os.environ['WEBBPSF_PATH'], "CGI/optics/SPC_LS_30D88_256pix.fits.gz")

D_prim = 2.37 * u.m
D_relay = 20 * u.mm
fr_pri = 7.8
fl_pri = D_prim * fr_pri
fl_m2 = fl_pri * D_relay / D_prim
fr_m3 = 20.
fl_m3 = fr_m3 * D_relay

wfirst_optsys = poppy.FresnelOpticalSystem(pupil_diameter=D_prim, beam_ratio=1, npix=512)

telap = poppy.FITSOpticalElement(transmission=Tel_fname)
SP = poppy.FITSOpticalElement(transmission=SP_fname)
FPM = poppy.FITSOpticalElement(transmission=FPM_fname)

m1 = poppy.QuadraticLens(fl_pri, name='Primary')
m2 = poppy.QuadraticLens(fl_m2, name='M2')
m3 = poppy.QuadraticLens(fl_m3, name='M3')
m4 = poppy.QuadraticLens(fl_m3, name='M4')
m5 = poppy.QuadraticLens(fl_m3, name='M5')
m6 = poppy.QuadraticLens(fl_m3, name='M6')

wfirst_optsys.add_optic(telap)
wfirst_optsys.add_optic(m1)
wfirst_optsys.add_optic(m2, distance = fl_pri + fl_m2)
wfirst_optsys.add_optic(m3, distance = fl_m2 + fl_m3)
wfirst_optsys.add_optic(m4, distance = 2*fl_m3)
#wfirst_optsys.add_optic(SP, distance = fl_m3)
#wfirst_optsys.add_optic(m5, distance = fl_m3)
#wfirst_optsys.add_optic(FPM, distance = fl_m3)

wfirst_optsys.add_optic(m5, distance = 2*fl_m3)

wfirst_optsys.add_optic(poppy.ScalarTransmission(planetype=poppy.poppy_core.PlaneType.intermediate, name='focus'),
                        distance=fl_m3)

In [None]:
plt.figure(figsize=(12,8))
psf, waves = wfirst_optsys.calcPSF(wavelength=wavelen, display_intermediates=False, return_intermediates=True)

In [None]:
exit_wf = waves[-1]
plt.imshow(np.log10(exit_wf.intensity), cmap='gnuplot', vmin=-13, vmax=0)
plt.colorbar()

In [None]:
SP_fname = os.path.join(os.environ['WEBBPSF_PATH'], "CGI/optics/CHARSPC_SP_256pix.fits.gz")

SP = poppy.FITSOpticalElement(transmission=SP_fname)
SP.display()

#set SP pixelscale to match the beam at the position of the SP
SP.pixelscale=7.812515028023353e-05*u.m/u.pixel



# 3. WFIRST with shaped pupil (without Lyot)

In [None]:
wavelen = 770e-9


D_prim = 2.37 * u.m
D_relay = 20 * u.mm
fr_pri = 7.8
fl_pri = D_prim * fr_pri
fl_m2 = fl_pri * D_relay / D_prim
fr_m3 = 20.
fl_m3 = fr_m3 * D_relay

wfirst_optsys = poppy.FresnelOpticalSystem(pupil_diameter=D_prim, beam_ratio=1/10, npix=512)

telap = poppy.FITSOpticalElement(transmission=Tel_fname)
FPM = poppy.FITSOpticalElement(transmission=FPM_fname)
m1 = poppy.QuadraticLens(fl_pri, name='Primary')
m2 = poppy.QuadraticLens(fl_m2, name='M2')
m3 = poppy.QuadraticLens(fl_m3, name='M3')
m4 = poppy.QuadraticLens(fl_m3, name='M4')
m5 = poppy.QuadraticLens(fl_m3, name='M5')
m6 = poppy.QuadraticLens(fl_m3, name='M6')

wfirst_optsys.add_optic(telap)
wfirst_optsys.add_optic(m1)
wfirst_optsys.add_optic(m2, distance = fl_pri + fl_m2)
wfirst_optsys.add_optic(m3, distance = fl_m2 + fl_m3)
wfirst_optsys.add_optic(m4, distance = 2*fl_m3)
wfirst_optsys.add_optic(SP, distance = fl_m3)
wfirst_optsys.add_optic(m5, distance = fl_m3)
#wfirst_optsys.add_optic(FPM, distance = fl_m3)

#wfirst_optsys.add_optic(m5, distance = 2*fl_m3)

wfirst_optsys.add_optic(poppy.ScalarTransmission(planetype=poppy.poppy_core.PlaneType.intermediate, name='focus'),
                        distance=fl_m3)

In [None]:
plt.figure(figsize=(12,8))
%timeit psf, waves = wfirst_optsys.calcPSF(wavelength=wavelen, display_intermediates=False, return_intermediates=True)

#### no acceleration:
    CPU times: user 1min 14s, sys: 20.4 s, total: 1min 34s
Wall time: 1min 34s
CPU times: user 37.2 s, sys: 10.1 s, total: 47.3 s
Wall time: 23.8 s

In [None]:
 psf, waves = wfirst_optsys.calcPSF(wavelength=wavelen, display_intermediates=False, return_intermediates=True)
exit_wf = waves[-1]
plt.imshow(np.log10(exit_wf.intensity[exit_wf.n/2-25:exit_wf.n/2+25,exit_wf.n/2-25:exit_wf.n/2+25]), 
           cmap='gray', vmin=-10, vmax=0)
plt.colorbar()


In [None]:

plt.figure(figsize=[14,7])
plt.subplot(121)
plt.imshow(waves[-3].intensity+waves[-4].intensity)
plt.subplot(122)
plt.imshow(waves[-3].intensity)

In [None]:
waves

In [None]:
osys = poppy.OpticalSystem(pupil_diameter=2.4)
SP_original = poppy.FITSOpticalElement(transmission=SP_fname)


osys.add_pupil( SP_original)    # pupil radius in meters
osys.add_detector(pixelscale=0.010, fov_arcsec=2.0)  # image plane coordinates in arcseconds

psf = osys.calc_psf(wavelength=wavelen)                            # wavelength in microns
poppy.display_PSF(psf, title='The Airy Function',vmin=1e-10,imagecrop=5)
SP

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(131)
plt.imshow(np.log10(exit_wf.intensity[exit_wf.n/2-150:exit_wf.n/2+150,exit_wf.n/2-150:exit_wf.n/2+150]), cmap='gray', vmin=-10, vmax=0)
plt.colorbar()
plt.subplot(132)
plt.imshow(np.log10(psf[0].data), cmap='gray', vmin=-10, vmax=0)
plt.colorbar()

plt.subplot(133)

plt.savefig("SP_%i+%ix.png"%(exit_wf.n,exit_wf.oversample))


In [None]:
exit_wf.n