# Gym environment for a High Contrast Imaging instrument's AO control system.

## Attributes:
This is a simplified HCI system environment. The dynamics of the environment is based on a Markov Decision Process. The relevant attributes are the "state", "action", "reward".  
The state in this case is the WFS recorded phase projected on the DM actuator mode basis. For example, with a 25x25 actuator DM the state is a 25x25 matrix of actuator values.  
The action is the additional actuator levels above the flat level calculated by inverting the DM influence matrix. A matrix equal in dimensions to the state vector.  
The reward is a single scalar value, which in this case corresponds to the achieved contrast within the dark hole. Specifically, it is the negative log of the contrast. A high reward implies high contrast at the dark hole.

## Description:
1. The optical system is defined in a class that inherits from OpenAI's gym environment class.
2. This class contains 3 main methods - \_\_init\_\_(...), step(...), reset(...).
3. The \_\_init\_\_(...) method sets up the optical system with the following components in order: a turbulence generator, a demagnifier, a DM, a WFS, a coronagraph, a quasi-static NCPA aberration, and a detector.
4. The step(action) function does the following:
* The DM is updated with the actuator values specified in the "action".
* The wavefront is propagated through the entire optical path, i.e. the WFS optical path as well as the science optical path.
* The WFS measurement is read out. The state variable for WFS measurement is updated.
* The instantaneous focal plane image is calculated.
* A reward is calculated using the metric function on the focal plane image, and is used to update the reward variable.
* The timestep is incremented by 1, and the turbulence generator is evolved accordingly.
5. A reinforcement learning algorithm works in this environment to maximize expected future reward.

### Step 1: Installing hcipy on colab.

In [0]:
!pip uninstall hcipy
!rm -rf hcipy
!git clone https://github.com/ehpor/hcipy.git
!cd hcipy; git pull
!cd hcipy; python setup.py install
!pip install tensorflow==2.0.0

### Step 2: Importing necessary libraries.

In [0]:
# Necessary imports
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from hcipy import *
import matplotlib.pyplot as plt
from astropy.io import fits
import os, glob
import time
import tensorflow as tf

### Step 3: Defining the HCI testbench class.

In [0]:
from HCI_TestBench import HCI_TestBench

### Step 4: Define testbench parameters.

In [0]:
# Create aperture and pupil/focal grids
wavelength = 532e-9
N = 512
D = 10.5e-3
pupil_grid = make_pupil_grid(N, D)
science_focal_grid = make_focal_grid(8, 20, wavelength/D)
aperture = circular_aperture(D)

# Telescope parameters
Dtel = 1
tel_pupil_grid = make_pupil_grid(N, Dtel)
tel_aperture = circular_aperture(Dtel)

# Create the deformable mirror
actuator_grid = make_pupil_grid(12, D*1.1)
xinetics_basis = make_xinetics_influence_functions(pupil_grid, 12, D * 1.1 / 12)
dm = DeformableMirror(xinetics_basis)
num_modes = len(dm.influence_functions)
dm.actuators = np.zeros(num_modes)

# Atmosphere parameters
velocity = 10 #m/s
L0 = 40 # outer scale
r0 = 0.4 # Fried parameter
height = 0 # layer height

# Make atmosphere
np.random.seed(19900305)
layers = []
layer = InfiniteAtmosphericLayer(tel_pupil_grid, Cn_squared_from_fried_parameter(r0, 500e-9), L0, velocity, height, stencil_length=2, use_interpolation=True)
layers.append(layer)
atmosphere = MultiLayerAtmosphere(layers, False)

## Create a demagnifier
demag = Magnifier(D / Dtel)

# Make initial phasescreen
wf_tel = Wavefront(tel_aperture(tel_pupil_grid), wavelength)
wf_tel.total_power = 100000
wf = demag.forward(wf_tel)

## Create propagator from pupil to focal plane
prop = FraunhoferPropagator(pupil_grid, science_focal_grid)

## Get the app coronagraph
app_amp = fits.getdata('Square_20_80_20_25_0_2_amp_resampled_512.fits').ravel()
app_phase = fits.getdata('Square_20_80_20_25_0_2_phase_resampled_512.fits').ravel()
app = Apodizer(app_amp * np.exp(1j * app_phase))

## Create detector
science_camera = NoiselessDetector()

## Generate a diffraction limited image for metrics
diff_lim_img = prop(wf).power

## Get the unit lambda/D
l_D = wavelength / D
plot_grid = make_focal_grid(8, 20, 1)

## Create a noiseless camera image from the perfectly flat wavefront with coronograph
wfdm = dm.forward(wf)
wfapp = app.forward(wfdm)
imapp = prop(wfapp).power
dz_ind = np.where((imapp.grid.x >= (2 * l_D)) &\
                  (imapp.grid.x <= (8 * l_D)) &\
                  (imapp.grid.y >= (-3 * l_D)) &\
                  (imapp.grid.y <= (3 * l_D)))

## Create an NCP aberration
num_coeffs = 9
plaw_index = -1
np.random.seed(7)
coeffs = ((np.random.rand(num_coeffs) - 0.5) * 2) * (np.arange(num_coeffs, dtype=float) + 1) ** plaw_index
coeffs = np.zeros(coeffs.shape)
zernike_basis = make_zernike_basis(num_coeffs, D, pupil_grid, 2)
ncp_phase = np.dot(zernike_basis.transformation_matrix, coeffs)
ncp = Apodizer(np.exp(1j * ncp_phase))

# Create an estimate of the NCP aberration for the forward model
ncp_field_est = np.exp(1j * np.zeros(app_phase.shape))
estimated_coeffs = np.zeros(coeffs.shape)

In [4]:
tb = HCI_TestBench(wf_tel, atmosphere, demag, dm, None, ncp, app, prop, science_camera, dz_ind)



### Step 5: Train the DDPG agent

In [0]:
from agent import DDPG
from collections import deque

In [0]:
# Start with supervised learning trained model that mimics the identity function
from tensorflow.keras.models import load_model

trained_model = load_model('identity_function.h5', compile=False)

In [0]:
# Define all hyperparameters here
ACTOR_LR = 5e-5
CRITIC_LR = 1e-3
RANDOM_SEED = 42
MU = 0.0
SIGMA = 1.0
BUFFER_SIZE = 1e4
BATCH_SIZE = 8
GAMMA = 0.0
TAU = 1e-2
N_TIME_STEPS = 1
N_LEARN_UPDATES = 1
NOISE_SCALER = 0
NOISE_DECAY = 0.99

if tf.test.is_gpu_available():
    DEVICE = "/GPU:0"
else:
    DEVICE = "/device:CPU:0"

In [0]:
state_size = (12, 12, 1)
action_size = (12, 12, 1)

In [0]:
agent = DDPG(state_size, action_size, ACTOR_LR, CRITIC_LR,
             RANDOM_SEED, MU, SIGMA, BUFFER_SIZE, BATCH_SIZE,
             GAMMA, TAU, N_TIME_STEPS, N_LEARN_UPDATES, DEVICE)

In [0]:
agent.actor_local.model.set_weights(trained_model.get_weights())
agent.actor_target.model.set_weights(trained_model.get_weights())

In [0]:
def ddpg(n_episodes=1000, print_every=100):
    scores_deque = deque(maxlen=print_every)
    scores = []
    
    for i_episode in range(1, n_episodes+1):
        state = tb.reset()
        agent.reset()
        score = 0
        t = 0
        
        while(t < 10): # 10ms per episode
            t += 1
            action = agent.act(state, NOISE_SCALER)
            next_state, reward, done, _ = tb.step(action)
            agent.step(t, state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_deque.append(score / t)
        scores.append(score / t)
        print('\rEpisode {}\t Time in ms:{}\t Average Score: {:.2f}'.format(i_episode, t, np.mean(scores_deque)), end="")
        agent.actor_local.model.save('checkpoint_actor.h5')
        agent.critic_local.model.save('checkpoint_critic.h5')
        if i_episode % print_every == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
            
        if np.mean(scores_deque) >= 8.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_deque)))
            agent.actor_local.model.save('checkpoint_actor.h5')
            agent.critic_local.model.save('checkpoint_critic.h5')
            break
            
    return scores

scores = ddpg()

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

Episode 84	 Time in ms:10	 Average Score: 2.20