In [None]:
# %load BE_chem_solve.C
/*****************************************************************************
 *                                                                           *
 * Copyright 2011 Daniel R. Reynolds                                         *
 *                                                                           *
 * This software is released under the terms of the "Enzo Public License"    *
 * in the accompanying LICENSE file.                                         *
 *                                                                           *
 *****************************************************************************/
/***********************************************************************
/
/  Generic rate equation solver
/
/  written by: Daniel Reynolds
/  date:       October 2011
/
/  PURPOSE: This routine solves the coupled equations,
/               du/dt = f(u),
/           using an implicit backward Euler method with stopping criteria 
/               ||(xnew - xold)/(atol + rtol*xnew)||_RMS < 1
/
/ Solver API: 
/ int BE_chem_solve(int (*f)(double *, double *, int, int), 
/                   int (*J)(double *, double *, int, int), 
/                   double *u, double dt, double *rtol, 
/                   double *atol, int nstrip, int nchem)
/
/ output: integer flag denoting success (0) or failure (1)
/
/ inputs:
/
/   int *f -- function pointer that has the form
/             int f(double *u, double *fu, int nstrip, int nchem)
/       Here, the set of unknowns *u is defined over a strip of length
/       nstrip that contains nchem species per cell, and outputs an array
/       *fu of the same size/shape as *u that gives the ODE RHS
/       corresponding to  du/dt = f(u).  The integer return value should
/       denote success (0) or failure (1). 
/
/   int *J -- function pointer that has the form
/             int J(double *u, double *Ju, int nstrip, int nchem)
/       Here the Jacobian Ju should be a 1D array of length
/       nchem*nchem*nstrip.  Here, for spatial location k, with Jacobian
/       matrix row i and column j, the entries should be ordered as i
/       (fastest) then j (middle) then k (slowest), i.e. the Jacobian 
/       matrix for each cell is stored in a contiguous block, in 
/       column-major (Fortran) ordering.
/
/   double *u -- initial conditions, stored in the form u[nstrip*nchem], 
/       with the nchem variables in a given cell stored contiguously.
/
/   double dt -- desired time step size
/
/   double *rtol -- relative tolerance in each equation, of same size 
/       and ordering as u.
/
/   double *atol -- absolute tolerance in each equation, of the same 
/       size and ordering as u.
/
/   int nstrip, int nchem -- inputs denoting the size of the spatial
/       strip and the number of species per cell. 
/
************************************************************************/

#include <stdio.h>
#include <math.h>

typedef int(*rhs_f)(double *, double *, int, int, void *);
typedef int(*jac_f)(double *, double *, int, int, void *);

// function prototypes
int BE_Resid_Fun(rhs_f, double *u, double *u0, double *gu, double dt, 
                 int nstrip, int nchem, double *scaling, double *inv_scaling, void *sdata);
int BE_Resid_Jac(jac_f, double *u, double *Ju, double dt, 
                 int nstrip, int nchem, double *scaling, double *inv_scaling, void *sdata);
int Gauss_Elim(double *A, double *x, double *b, int n);


// solver function
int BE_chem_solve(rhs_f f, jac_f J,
		  double *u, double dt, double *rtol, 
                  double *atol, int nstrip, int nchem, 
		  double *scaling, void *sdata,
          double *u0, double *s, double *gu, double *Ju) {

  // local variables
  int i, j, ix, isweep, ier, ONE=1, ioff;
  int sweeps=10;
  double lam=1.0;
  int unsolved;
  int found_nan;

  //create an array to store 1/scaling
  double *inv_scaling = new double[nchem*nstrip];
  for (i=0; i<nstrip*nchem; i++)  inv_scaling[i] = 1.0 / scaling[i];

  ///*
  // rescale input to normalized variables
  for (i=0; i<nstrip*nchem; i++)  u[i] *= inv_scaling[i];
  // also rescale the absolute tolerances
  for (i=0; i<nstrip*nchem; i++)  atol[i] *= inv_scaling[i];
  //*/

  //fprintf(stderr, "nchem = %d, nstrip = %d\n", nchem, nstrip);

  // create/initialize temporary arrays
  //double *u0 = new double[nchem*nstrip];        // initial state
  //double *s  = new double[nchem];               // Newton update (each cell)
  //double *gu = new double[nchem*nstrip];        // nonlinear residual
  //double *Ju = new double[nchem*nchem*nstrip];  // Jacobian
  
  for (i=0; i<nstrip*nchem; i++) {
    u0[i] = u[i];
    //fprintf(stderr, "u[i]: %0.6g (for %d)\n", u[i], i);
  }
  for (i=0; i<nchem; i++)                s[i] = 0.0;
  for (i=0; i<nstrip*nchem; i++)        gu[i] = 0.0;
  for (i=0; i<nstrip*nchem*nchem; i++)  Ju[i] = 0.0;

  // perform Newton iterations
  //found_nan = 0;
  for (isweep=0; isweep<sweeps; isweep++) {

    // compute nonlinear residual and Jacobian
    if (BE_Resid_Fun(f, u, u0, gu, dt, nstrip, nchem, scaling, inv_scaling, sdata) != 0) {
      ///*
      // rescale back to input variables
      for (i=0; i<nstrip*nchem; i++)  u[i] *= scaling[i];
      // also rescale the absolute tolerances back
      for (i=0; i<nstrip*nchem; i++)  atol[i] *= scaling[i];
      //*/

      //fprintf(stderr, "Error in BE_Resid_Fun \n");
      delete[] inv_scaling;
      return 1;
    }
   
    if (BE_Resid_Jac(J, u, Ju, dt, nstrip, nchem, scaling, inv_scaling, sdata) != 0) {
      ///*
      // rescale back to input variables
      for (i=0; i<nstrip*nchem; i++)  u[i] *= scaling[i];
      // also rescale the absolute tolerances back
      for (i=0; i<nstrip*nchem; i++)  atol[i] *= scaling[i];
      //*/

      //fprintf(stderr, "Error in BE_Resid_Jac \n");
      delete[] inv_scaling;
      return 1;
    }

    // Newton update for each cell in strip, accumulate convergence check
    unsolved = 0;
    for (ix=0; ix<nstrip; ix++) {
      // set offset
      ioff = ix*nchem;

      // solve for Newton update
      if (Gauss_Elim(&(Ju[ix*nchem*nchem]), s, &(gu[ioff]), nchem) != 0) {
          //unsolved = 1;
          ///*
          // rescale back to input variables
          for (i=0; i<nstrip*nchem; i++)  u[i] *= scaling[i];
          // also rescale the absolute tolerances back
          for (i=0; i<nstrip*nchem; i++)  atol[i] *= scaling[i];
          //*/
          fprintf(stderr, "There was an unsolved case in Gauss_Elim! \n");
          
          delete[] inv_scaling;
          return 1;
          //break;
      }
	/*ENZO_FAIL("Error in Gauss_Elim");*/

      // update solution in this cell
      for (i=0; i<nchem; i++)  u[ioff+i] -= lam*s[i];

      // check error in this cell (max norm)
      for (i=0; i<nchem; i++) {
          if ( fabs(s[i]) > (atol[ioff+i] + rtol[ioff+i] * fabs(u[ioff+i]))) {
              if (dt < 1.0) {
	              fprintf(stderr, "dt %0.5g, Sweep %d, Unsolved[%d]: nchem: %d change: % 0.8g sum tol: % 0.5g atol: % 0.5g rtol: % 0.5g value: % 0.5g\n",
		                  dt, isweep, ix, i, s[i], atol[ioff+i] + rtol[ioff+i] * fabs(u[ioff+i]), atol[ioff+i], rtol[ioff+i], u[ioff+i]);
              }
              unsolved = 1;
          }
          
          
          
          if ( u[ioff+i] != u[ioff+i] ) {  // NaN encountered!!
            printf("BE_chem_solve ERROR: NaN in iteration %i (cell %i, species %i); dt = %0.5g, atol = %0.5g\n",
                   isweep,ix,i, dt, atol[ioff+i]);
            if (dt < 1.0) {
	              fprintf(stderr, "dt %0.5g, Sweep %d, Unsolved[%d]: nchem: %d change: % 0.8g sum tol: % 0.5g atol: % 0.5g rtol: % 0.5g value: % 0.5g\n",
		                  dt, isweep, ix, i, s[i], atol[ioff+i] + rtol[ioff+i] * fabs(u[ioff+i]), atol[ioff+i], rtol[ioff+i], u[ioff+i]);
            }
	    ///*
	    // rescale back to input variables
	    for (i=0; i<nstrip*nchem; i++)  u[i] *= scaling[i];
            // also rescale the absolute tolerances back
            for (i=0; i<nstrip*nchem; i++)  atol[i] *= scaling[i];
	    //*/
            
            delete[] inv_scaling;
            return 1;
            //found_nan = 1;
            //unsolved = 1;
            //break;
          }
      } // i loop

    } // ix loop

    // check if we ended up with a NaN, which certainly won't solve the next time around
    //if (found_nan) break;

    // check for convergence
    if (!unsolved)  break;

  } // end newton iterations

  // free temporary arrays
  //delete[] u0;
  //delete[] s;
  //delete[] gu;
  //delete[] Ju;
  delete[] inv_scaling;

  ///*
  // rescale back to input variables
  for (i=0; i<nstrip*nchem; i++)  u[i] *= scaling[i];
  // also rescale the absolute tolerances back
  for (i=0; i<nstrip*nchem; i++)  atol[i] *= scaling[i];
  //*/

  // final check, diagnostics output
  if (unsolved) {
    //printf("BE_chem_solve WARNING: unsolved after %i iterations\n",isweep);
    return 1;
  } else {
    /*printf("BE_chem_solve: solved with %i total iterations\n",isweep);*/
    return 0;
  }

}


// nonlinear residual calculation function, forms nonlinear residual defined 
// by backwards Euler discretization, using user-provided RHS function f.
int BE_Resid_Fun(rhs_f f, double *u, double *u0, double *gu, double dt, 
                 int nstrip, int nchem, double *scaling, double*inv_scaling, void *sdata) 
{
  // local variables
  int i;

  ///*
  // rescale back to input variables
  for (i=0; i<nstrip*nchem; i++)  u[i] *= scaling[i];
  //*/

  // call user-supplied RHS function at current guess
  if (f(u, gu, nstrip, nchem, sdata) != 0)
    /*ENZO_FAIL("Error in user-supplied ODE RHS function f(u)");*/
    return 1;

  ///*
  // rescale u to scaled variables
  for (i=0; i<nstrip*nchem; i++)  u[i] *= inv_scaling[i];

  // rescale rhs to normalized variables variables
  for (i=0; i<nstrip*nchem; i++)  gu[i] *= inv_scaling[i];
  //*/

  // update RHS function to additionally include remaining terms for residual,
  //   g(u) = u - u0 - dt*f(u)
  for (i=0; i<nstrip*nchem; i++)  gu[i] = u[i] - u0[i] - dt*gu[i];

  return 0;
}


// nonlinear residual Jacobian function, forms Jacobian defined by backwards
//  Euler discretization, using user-provided Jacobian function J.
int BE_Resid_Jac(jac_f J, double *u, double *Ju, double dt, 
		 int nstrip, int nchem, double *scaling, double*inv_scaling, void *sdata)
{
  // local variables
  int ix, ivar, jvar, i;

  ///*
  // rescale back to input variables
  for (i=0; i<nstrip*nchem; i++)  u[i] *= scaling[i];
  //*/

  // call user-supplied Jacobian function at current guess
  if (J(u, Ju, nstrip, nchem, sdata) != 0)
    /*ENZO_FAIL("Error in user-supplied ODE Jacobian function J(u)");*/
    return 1;

  ///*
  // rescale u to scaled variables
  for (i=0; i<nstrip*nchem; i++)  u[i] *= inv_scaling[i];

  // rescale Jacobian rows to use normalization
  for (ix=0; ix<nstrip; ix++)
    for (jvar=0; jvar<nchem; jvar++) 
      for (ivar=0; ivar<nchem; ivar++) 
	Ju[(ix*nchem+jvar)*nchem+ivar] *= inv_scaling[ix*nchem+ivar];

  // rescale Jacobian columns to account for normalization
  for (ix=0; ix<nstrip; ix++)
    for (ivar=0; ivar<nchem; ivar++) 
      for (jvar=0; jvar<nchem; jvar++) 
	Ju[(ix*nchem+jvar)*nchem+ivar] *= scaling[ix*nchem+jvar];
  //*/

  // update Jacobian to additionally include remaining terms,
  //   J = I - dt*Jf(u)
  for (ix=0; ix<nstrip*nchem*nchem; ix++)   Ju[ix] = -dt*Ju[ix];
  for (ix=0; ix<nstrip; ix++)
    for (ivar=0; ivar<nchem; ivar++)
      Ju[ix*nchem*nchem + ivar*nchem + ivar] += 1.0;
  
  return 0;
}



// Gaussian Elimination with partial pivoting, followed by backwards 
// substitution, to solve a linear system Ax=b, where A is an n*n matrix, 
// stored in column-major (Fortran) ordering, and where x and b are vectors 
// of length n.
#define idx(i,j,n) ( j*n + i )
int Gauss_Elim(double *A, double *x, double *b, int n)
{
  // local variables
  int i, j, k, p;
  double m, dtmp;

  // copy rhs into solution
  for (i=0; i<n; i++)  x[i] = b[i];

  // forwared elimination stage:
  for (k=0; k<n-1; k++) {
    // search for pivot row
    p = k;
    for (i=k+1; i<n; i++)
      if (fabs(A[idx(i,k,n)]) > fabs(A[idx(p,k,n)]))  p = i;
    
    // perform row swap
    for (j=k; j<n; j++)  {
      dtmp = A[idx(k,j,n)];
      A[idx(k,j,n)] = A[idx(p,j,n)];
      A[idx(p,j,n)] = dtmp;
    }
    dtmp = x[k];
    x[k] = x[p];
    x[p] = dtmp;

    // check for singular matrix
    //if (fabs(A[idx(k,k,n)]) < 1.e-14*fabs(A[0]))
      //fprintf(stderr,"Gauss Elim warning: singular matrix, results may be inaccurate\n");
    
    // elimination of submatrix (column-major ordering)
    for (i=k+1; i<n; i++) 
      A[idx(i,k,n)] /= A[idx(k,k,n)];
    for (j=k+1; j<n; j++)
      for (i=k+1; i<n; i++) 
	A[idx(i,j,n)] -= A[idx(i,k,n)]*A[idx(k,j,n)];
    for (i=k+1; i<n; i++) 
      x[i] -= A[idx(i,k,n)]*x[k];
  } // k loop
  
  // check for singular matrix in last row
  //if (fabs(A[idx(n-1,n-1,n)]) < 1.e-14*fabs(A[0]))
    //fprintf(stderr,"Gauss Elim warning: singular matrix, results may be inaccurate (in last row)\n");
  
  // backwards substitution stage:
  for (i=n-1; i>=0; i--) {
    for (j=i+1; j<n; j++)
      x[i] -= A[idx(i,j,n)]*x[j];
    x[i] /= A[idx(i,i,n)];
  }

  return 0;
}


In [None]:
# %load sunny_solver_run.pyx

cimport numpy as np
import numpy as np
import time
from libc.stdlib cimport malloc, free

cdef extern from "alloca.h":
    void *alloca(int)

# NSPECIES here is N in the .C.template file
DEF NSPECIES = 7
DEF MAX_NCELLS=1024

cdef extern from "sunny_solver.h":
    cdef int _MAX_NCELLS  "MAX_NCELLS"
    cdef int _NSPECIES "NSPECIES"
    ctypedef struct sunny_data:
        double dbin
        double idbin
        double bounds[2]
        int nbins

        double d_zbin
        double id_zbin
        double z_bounds[2]
        int n_zbins

        double current_z
        double zdef
        double dz

        double Ts[MAX_NCELLS]
        double Tdef[MAX_NCELLS]
        double dT[MAX_NCELLS]
        double logTs[MAX_NCELLS]
        double dTs_ge[MAX_NCELLS]
        double r_k01[1024]
        double rs_k01[MAX_NCELLS]
        double drs_k01[MAX_NCELLS]
        double r_k02[1024]
        double rs_k02[MAX_NCELLS]
        double drs_k02[MAX_NCELLS]
        double r_k07[1024]
        double rs_k07[MAX_NCELLS]
        double drs_k07[MAX_NCELLS]
        double r_k08[1024]
        double rs_k08[MAX_NCELLS]
        double drs_k08[MAX_NCELLS]
        double r_k09[1024]
        double rs_k09[MAX_NCELLS]
        double drs_k09[MAX_NCELLS]
        double r_k10[1024]
        double rs_k10[MAX_NCELLS]
        double drs_k10[MAX_NCELLS]
        double r_k11[1024]
        double rs_k11[MAX_NCELLS]
        double drs_k11[MAX_NCELLS]
        double r_k12[1024]
        double rs_k12[MAX_NCELLS]
        double drs_k12[MAX_NCELLS]
        double r_k13[1024]
        double rs_k13[MAX_NCELLS]
        double drs_k13[MAX_NCELLS]
        double r_k14[1024]
        double rs_k14[MAX_NCELLS]
        double drs_k14[MAX_NCELLS]
        double r_k15[1024]
        double rs_k15[MAX_NCELLS]
        double drs_k15[MAX_NCELLS]
        double r_k16[1024]
        double rs_k16[MAX_NCELLS]
        double drs_k16[MAX_NCELLS]
        double r_k17[1024]
        double rs_k17[MAX_NCELLS]
        double drs_k17[MAX_NCELLS]
        double r_k18[1024]
        double rs_k18[MAX_NCELLS]
        double drs_k18[MAX_NCELLS]
        double r_k19[1024]
        double rs_k19[MAX_NCELLS]
        double drs_k19[MAX_NCELLS]
        double r_k21[1024]
        double rs_k21[MAX_NCELLS]
        double drs_k21[MAX_NCELLS]
        double r_k22[1024]
        double rs_k22[MAX_NCELLS]
        double drs_k22[MAX_NCELLS]
        double r_k23[1024]
        double rs_k23[MAX_NCELLS]
        double drs_k23[MAX_NCELLS]
        int bin_id[MAX_NCELLS]
        int ncells

    ctypedef int(*rhs_f)(double *, double *, int, int, void *)
    ctypedef int(*jac_f)(double *, double *, int, int, void *)

    int sunny_main(int argc, char **argv)
    sunny_data *sunny_setup_data(int *NumberOfFields,
            char ***FieldNames)
    void sunny_read_rate_tables(sunny_data*)
    void sunny_read_cooling_tables(sunny_data*)
    double dengo_evolve_sunny (double dtf, double &dt, double z,
                                         double *input, double *rtol,
                                         double *atol, int dims,
                                         sunny_data *data)
    int BE_chem_solve(rhs_f f, jac_f J,
		    double *u, double dt, double *rtol, 
                    double *atol, int nstrip, int nchem, 
		    double *scaling, void *sdata, double *u0, double *s,
            double *gu, double *Ju
           )
    int calculate_jacobian_sunny(double *input, double *Joutput,
            int nstrip, int nchem, void *sdata)
    int calculate_rhs_sunny(double *input, double *rhs, int nstrip,
                      int nchem, void *sdata)
    int ensure_electron_consistency(double *input, int nstrip, int nchem)

def main_run_sunny():
    t1 = time.time()
    sunny_main(0, NULL)
    t2 = time.time()
    print "Total elapsed time: %0.3e" % (t2-t1)

def run_sunny(ics, double tf, int niter = 10000,
                        int intermediate = 1, z = -1.0):
    assert(_MAX_NCELLS == MAX_NCELLS)
    assert(_NSPECIES == NSPECIES)
    cdef np.ndarray[np.float64_t, ndim=1] H2_1_arr = ics["H2_1"]
    # All of the intermediate variables get declared, but not necessarily assigned
    cdef np.ndarray[np.float64_t, ndim=2] H2_1_int
    cdef np.ndarray[np.float64_t, ndim=1] H2_2_arr = ics["H2_2"]
    # All of the intermediate variables get declared, but not necessarily assigned
    cdef np.ndarray[np.float64_t, ndim=2] H2_2_int
    cdef np.ndarray[np.float64_t, ndim=1] H_1_arr = ics["H_1"]
    # All of the intermediate variables get declared, but not necessarily assigned
    cdef np.ndarray[np.float64_t, ndim=2] H_1_int
    cdef np.ndarray[np.float64_t, ndim=1] H_2_arr = ics["H_2"]
    # All of the intermediate variables get declared, but not necessarily assigned
    cdef np.ndarray[np.float64_t, ndim=2] H_2_int
    cdef np.ndarray[np.float64_t, ndim=1] H_m0_arr = ics["H_m0"]
    # All of the intermediate variables get declared, but not necessarily assigned
    cdef np.ndarray[np.float64_t, ndim=2] H_m0_int
    cdef np.ndarray[np.float64_t, ndim=1] de_arr = ics["de"]
    # All of the intermediate variables get declared, but not necessarily assigned
    cdef np.ndarray[np.float64_t, ndim=2] de_int
    cdef np.ndarray[np.float64_t, ndim=1] ge_arr = ics["ge"]
    # All of the intermediate variables get declared, but not necessarily assigned
    cdef np.ndarray[np.float64_t, ndim=2] ge_int
    cdef np.ndarray[np.uint8_t, ndim=1] result_int
    cdef np.ndarray[np.float64_t, ndim=2] temp_int
    cdef np.ndarray[np.float64_t, ndim=1] t_int
    cdef np.ndarray[np.float64_t, ndim=1] dt_int
    cdef np.ndarray[np.float64_t, ndim=2] Ju_int
    
    
    cdef int i, j, k, iter
    cdef int dims = ge_arr.shape[0]
    cdef int NTOT = NSPECIES * dims
    cdef double *input = <double *> alloca(NTOT * sizeof(double))
    cdef double *prev = <double *> alloca(NTOT * sizeof(double))
    cdef double *atol = <double *> alloca(NTOT * sizeof(double))
    cdef double *rtol = <double *> alloca(NTOT * sizeof(double))
    cdef double *scale = <double *> alloca(NTOT * sizeof(double))
    cdef double v
    cdef double *total_density = <double *> alloca(dims * sizeof(double))
    
    

    if intermediate == 1:
        H2_1_int = np.zeros((dims, niter), "float64")
        H2_2_int = np.zeros((dims, niter), "float64")
        H_1_int = np.zeros((dims, niter), "float64")
        H_2_int = np.zeros((dims, niter), "float64")
        H_m0_int = np.zeros((dims, niter), "float64")
        de_int = np.zeros((dims, niter), "float64")
        ge_int = np.zeros((dims, niter), "float64")
        temp_int = np.zeros((dims, niter), "float64")
        result_int = np.zeros(niter, "uint8")
        t_int = np.zeros(niter, "float64")
        dt_int = np.zeros(niter, "float64")
        
        Ju_int = np.zeros( (dims * NSPECIES * NSPECIES, niter), "float64" )



    j = 0
    for i in range(dims):
        input[j] = prev[j] = H2_1_arr[i] / 2.0
        atol[j] = input[j] * 1e-06
        rtol[j] = 1e-06
        scale[j] = input[j] 
        print "H2_1", scale[j], H2_1_arr[i]
        j += 1
        input[j] = prev[j] = H2_2_arr[i] / 2.0
        atol[j] = input[j] * 1e-06
        rtol[j] = 1e-06
        scale[j] = input[j] 
        print "H2_2", scale[j], H2_2_arr[i]
        j += 1
        input[j] = prev[j] = H_1_arr[i] / 1.00794
        atol[j] = input[j] * 1e-06
        rtol[j] = 1e-06
        scale[j] = input[j] 
        print "H_1", scale[j], H_1_arr[i]
        j += 1
        input[j] = prev[j] = H_2_arr[i] / 1.00794
        atol[j] = input[j] * 1e-06
        rtol[j] = 1e-06
        scale[j] = input[j] 
        print "H_2", scale[j], H_2_arr[i]
        j += 1
        input[j] = prev[j] = H_m0_arr[i] / 1.00794
        atol[j] = input[j] * 1e-06
        rtol[j] = 1e-06
        scale[j] = input[j] 
        print "H_m0", scale[j], H_m0_arr[i]
        j += 1
        input[j] = prev[j] = de_arr[i] / 1.0
        atol[j] = input[j] * 1e-06
        rtol[j] = 1e-06
        scale[j] = input[j] 
        print "de", scale[j], de_arr[i]
        j += 1
        input[j] = prev[j] = ge_arr[i] / 1.0
        atol[j] = input[j] * 1e-06
        rtol[j] = 1e-06
        scale[j] = input[j] 
        print "ge", scale[j], ge_arr[i]
        j += 1


    
    
    cdef sunny_data *data = sunny_setup_data(NULL, NULL)
    cdef rhs_f f = calculate_rhs_sunny
    cdef jac_f jf = calculate_jacobian_sunny
    

    cdef double floor_val = 1e-50
    cdef double dt = tf / 1e5
    cdef double ttot = 0.0
    cdef int status
    # Allocate some temporary data
    # Now we manually evolve
    #ttot = dengo_evolve_sunny(tf, dt, input, rtol, atol, dims, data)
    data.current_z = z
    cdef double *u0 = <double *> malloc(sizeof(double) * dims * NSPECIES)
    cdef double *s = <double *> malloc(sizeof(double) * NSPECIES)
    cdef double *gu = <double *> malloc(sizeof(double) * dims * NSPECIES)
    cdef double *Ju = <double *> malloc(sizeof(double) * dims * NSPECIES * NSPECIES)
    
    
    
    
    ensure_electron_consistency(input, dims, NSPECIES); 
    for iter in range(niter):
        ensure_electron_consistency(input, dims, NSPECIES);
        status = BE_chem_solve(f, jf, input, dt, rtol, atol, dims, NSPECIES, scale,
                               <void *> data, u0, s, gu, Ju)
        
        
        
      

#        floor_values( input , NTOT, floor_val )     
#        floor_values( prev  , NTOT, floor_val )             

        if intermediate == 1:
            j = 0
            for i in range(dims):
                H2_1_int[i, iter] = input[j]
                j += 1
                H2_2_int[i, iter] = input[j]
                j += 1
                H_1_int[i, iter] = input[j]
                j += 1
                H_2_int[i, iter] = input[j]
                j += 1
                H_m0_int[i, iter] = input[j]
                j += 1
                de_int[i, iter] = input[j]
                j += 1
                ge_int[i, iter] = input[j]
                j += 1                
                temp_int[i, iter] = data.Ts[i]
    
            i = 0
            for Ju_idx in range( dims * NSPECIES * NSPECIES ):
                Ju_ele = Ju[Ju_idx]
                Ju_int[i, iter] = Ju_ele
                i += 1
    
            if status == 0:
                result_int[iter] = 1
            elif status == 1:
                result_int[iter] = 0
            t_int[iter] = ttot
            dt_int[iter] = dt
        if status == 0:
            if iter % 1000 == 0:
                print "Successful iteration[% 5i]: (%0.3e) %0.3e / %0.3e" % (
                    iter, dt, ttot, tf)
            copy_array(input, prev, NTOT)
            # Reset the scaling array to match the new values
            copy_array(input, scale, NTOT)
            ttot += dt
            dt *= 1.1
            if tf - ttot < dt:
                dt = tf - ttot
        elif status == 1:
            dt /= 2.0
            copy_array(prev, input, NTOT)
            # Reset the scaling array to match the new values
            copy_array(input, scale, NTOT)
            if dt < 1e-30 * tf:
                print "dt too small (%0.3e / %0.3e) so breaking" % (dt, tf)
                break
            continue
        if ttot >= tf: break
    free(u0)
    free(s)
    free(gu)
    free(Ju)

    print "End in %s iterations: %0.5e / %0.5e (%0.5e)" % (iter + 1, ttot, tf, tf - ttot)

    rv, rv_t = {}, {}
    H2_1_arr = rv["H2_1"] = np.zeros(dims, "float64")
    H2_2_arr = rv["H2_2"] = np.zeros(dims, "float64")
    H_1_arr = rv["H_1"] = np.zeros(dims, "float64")
    H_2_arr = rv["H_2"] = np.zeros(dims, "float64")
    H_m0_arr = rv["H_m0"] = np.zeros(dims, "float64")
    de_arr = rv["de"] = np.zeros(dims, "float64")
    ge_arr = rv["ge"] = np.zeros(dims, "float64")
    if intermediate:
        rv_t["H2_1"] = H2_1_int[:niter]
        rv_t["H2_2"] = H2_2_int[:niter]
        rv_t["H_1"] = H_1_int[:niter]
        rv_t["H_2"] = H_2_int[:niter]
        rv_t["H_m0"] = H_m0_int[:niter]
        rv_t["de"] = de_int[:niter]
        rv_t["ge"] = ge_int[:niter]
        rv_t["successful"] = result_int.astype("bool")
        rv_t['T'] = temp_int
        rv_t['t'] = t_int
        rv_t['dt'] = dt_int
    
        rv_t['Ju'] = Ju_int

    j = 0
    for i in range(dims):
        H2_1_arr[i] = input[j] * 2.0
        j += 1
        H2_2_arr[i] = input[j] * 2.0
        j += 1
        H_1_arr[i] = input[j] * 1.00794
        j += 1
        H_2_arr[i] = input[j] * 1.00794
        j += 1
        H_m0_arr[i] = input[j] * 1.00794
        j += 1
        de_arr[i] = input[j] * 1.0
        j += 1
        ge_arr[i] = input[j] * 1.0
        j += 1
    return rv, rv_t

cdef copy_array(double *input, double *output, int dims):
    cdef int i
    for i in range(dims):
        output[i] = input[i]

cdef floor_values(double *input, int dims, double floor):
    cdef int i
    for i in range(dims):
        if input[i] < floor:
            input[i] = floor