## Connect your drive to google colab session - only needs to be done once

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.listdir('/content/drive/MyDrive/SINDy_tutorial')
path = '/content/drive/MyDrive/SINDy_tutorial/'

## Import libraries - only needs to be done once

In [None]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
os.chdir(path)
from SINDy_tools import calc_gradients, sequentially_thresholded_LSQ

## Load data - only needs to be done once

In [None]:
f = h5py.File(path+'active_nematic.h5', 'r')
dx = f['dx'][()]
dt = f['dt'][()]
Qf = np.moveaxis(f['qtensor'][::4], 0, -1)
Qf = np.array([[Qf[0],Qf[1]],[Qf[1],-Qf[0]]])
uf = np.moveaxis(f['velocity'][::4], 0, -1)
Pf = np.moveaxis(f['pressure'][::4], 0, -1)
f.close()

## Create copy of data for processing - needs to be done each time noise is added

In [None]:
Q = np.copy(Qf)# + np.random.normal(scale=1e-5, loc=0.0, size=Qf.shape)
Q[0,0] = -Q[1,1]
Q[0,1] = Q[1,0]
u = np.copy(uf)
P = np.copy(Pf)

## Set parameters for SINDy fit

In [None]:
'''All data is arranged as [...,x,y,t], where "..." corresponds to vector terms'''
lx, ly, lt = Q.shape[-3:]
tensorTerm = 0
numSamples = 100_000
numDataPoints = lx *ly *lt
sample = np.arange(numDataPoints)
np.random.shuffle(sample)
sample = sample[:numSamples]

## Write names of library of terms for SINDy fit (order matters)

In [None]:
lhs_term = '∇²u'
rhs_term_names = []
# term_names.append('∇²u')
rhs_term_names.append('∂ₜu')
rhs_term_names.append('u')
rhs_term_names.append('∇.Q')
rhs_term_names.append('Q.u')
rhs_term_names.append('(u.u)u')
rhs_term_names.append('∇(Q:Q)')
rhs_term_names.append('∇P')
''' This system is incompressible, so I remove the next library term which will always be zero'''
# term_names.append('(∇.u)u')

rhs_term_names = np.array(rhs_term_names)

## Calculate derivatives of data, calculate terms, and sample terms

In [None]:
lap_u = calc_gradients(u, lap_order=[1], dh=(dx,dx))
grad_Q = calc_gradients(Q, grad_order=[1], dh=(dx,dx))
grad_P = calc_gradients(P, grad_order=[1], dh=(dx,dx))
dt_u = np.gradient(u, dt, axis=-1)

# LHS
lhs = np.copy(lap_u)[tensorTerm].flatten()[sample]

rhs = []
'''∂ₜu term'''
tmp = dt_u[tensorTerm].flatten()[sample]
rhs.append(tmp)
'''u term'''
tmp = u[tensorTerm].flatten()[sample]
rhs.append(tmp)
'''term_names.append('∇.Q')'''
tmp = np.einsum('aa...', grad_Q)[tensorTerm].flatten()[sample]
rhs.append(tmp)
'''Q.u term'''
tmp = np.einsum('ia...,a...->i...', Q, u)[tensorTerm].flatten()[sample]
rhs.append(tmp)
'''(u.u)u term'''
tmp = np.einsum('a...,a...->...', u, u)[None,...] *u
tmp = tmp[tensorTerm].flatten()[sample]
rhs.append(tmp)
'''∇(Q:Q) term'''
tmp = 2 *np.einsum('iab...,ba...->i...', grad_Q, Q)
tmp = tmp[tensorTerm].flatten()[sample]
rhs.append(tmp)
'''∇P term'''
tmp = grad_P[tensorTerm].flatten()[sample]
rhs.append(tmp)

rhs = np.array(rhs).T

## Perform a sequentially thresholded least squares fit

In [None]:
coeff, R2 = sequentially_thresholded_LSQ(rhs, lhs)
I = coeff != 0
plt.plot(R2, '-o')
plt.xlabel('Term number', fontsize=18)
plt.ylabel('$R^2$', fontsize=18)

In [None]:
fitTerms = 4
print('R^2=%0.8f'%R2[fitTerms-1])
print(coeff[fitTerms-1][I[fitTerms-1]])
print(rhs_term_names[I[fitTerms-1]])