In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
ls

[0m[01;34mdrive[0m/  [01;34msample_data[0m/


In [3]:
cd drive/

/content/drive


In [4]:
ls

[0m[01;34mMyDrive[0m/


In [5]:
cd MyDrive/

/content/drive/MyDrive


In [6]:
ls

'A DIGITIZED TOOL FOR WELL CANDIDATE SELECTION FOR MATRIX ACIDIZING IN SANDSTONE RESERVOIR (1).docx'
'A DIGITIZED TOOL FOR WELL CANDIDATE SELECTION FOR MATRIX ACIDIZING IN SANDSTONE RESERVOIR.docx'
'A DIGITIZED TOOL FOR WELL CANDIDATE SELECTION FOR MATRIX ACIDIZING IN SANDSTONE RESERVOIR.pdf'
 [0m[01;34mcheckpoints[0m/
 [01;34mcleaned_data[0m/
[01;34m'Colab Notebooks'[0m/
 [01;34mdeep-learning-articles[0m/
 [01;34mdeep-learning-symbolic-mathematics[0m/
'external defence slide.pptx'
'Getting started.pdf'
 IMG-20210412-WA0016.jpg
'JULIET PROJECT FINAL SLIDE-1.pptx'
 [01;34mlogs[0m/
 ode1_test.txt
 ode1_valid.txt
 [01;34mSymbolicMathematics[0m/


In [7]:
cd SymbolicMathematics/

/content/drive/MyDrive/SymbolicMathematics


In [8]:
ls

beam_integration.ipynb  CONTRIBUTING.md  main.py    split_data.py
CODE_OF_CONDUCT.md      LICENSE          README.md  [0m[01;34msrc[0m/


In [10]:
import os
import numpy as np
import sympy as sp
import pandas as pd
import torch

from src.utils import AttrDict
from src.envs import build_env
from src.model import build_modules
from src.envs.char_sp import InvalidPrefixExpression, ValueErrorExpression

from src.utils import to_cuda
from src.envs.sympy_utils import simplify

## Build environment / Reload model

In [11]:
# trained model, e.g. "wget https://dl.fbaipublicfiles.com/SymbolicMathematics/models/fwd_bwd.pth"
model_path = '../deep-learning-symbolic-mathematics/models/differential-equations/ode1.pth'
assert os.path.isfile(model_path)

In [12]:
params = params = AttrDict({

    # environment parameters
    'env_name': 'char_sp',
    'int_base': 10,
    'balanced': False,
    'positive': True,
    'precision': 10,
    'n_variables': 1,
    'n_coefficients': 0,
    'leaf_probs': '0.75,0,0.25,0',
    'max_len': 512,
    'max_int': 5,
    'max_ops': 15,
    'max_ops_G': 15,
    'clean_prefix_expr': True,
    'rewrite_functions': '',
    'tasks': 'ode1',
    'operators': 'add:10,sub:3,mul:10,div:5,sqrt:4,pow2:4,pow3:2,pow4:1,pow5:1,ln:4,exp:4,sin:4,cos:4,tan:4,asin:1,acos:1,atan:1,sinh:1,cosh:1,tanh:1,asinh:1,acosh:1,atanh:1',

    # model parameters
    'cpu': False,
    'emb_dim': 1024,
    'n_enc_layers': 6,
    'n_dec_layers': 6,
    'n_heads': 8,
    'dropout': 0,
    'attention_dropout': 0,
    'sinusoidal_embeddings': False,
    'share_inout_emb': True,
    'reload_model': model_path,

})

In [13]:
env = build_env(params)
x = env.local_dict['x']
c = sp.Symbol('c')
f = env.local_dict['f']

In [14]:
modules = build_modules(env, params)
encoder = modules['encoder']
decoder = modules['decoder']

## Start from a function F, compute its derivative f = F', and try to recover F from f

In [22]:
# here you can modify the integral function the model has to predict, F
F_infix = '5+((x*(x**(-1)))+(cos(sinh(tanh(x)))))'

In [23]:
# F (integral, that the model will try to predict)
F = sp.S(F_infix, locals=env.local_dict)
F

cos(sinh(tanh(x))) + 6

In [24]:
# f (F', that the model will take as input)
f = F.diff(x)
f

-(1 - tanh(x)**2)*sin(sinh(tanh(x)))*cosh(tanh(x))

### Compute prefix representations

In [25]:
F_prefix = env.sympy_to_prefix(F)
f_prefix = env.sympy_to_prefix(f)
print(f"F prefix: {F_prefix}")
print(f"f prefix: {f_prefix}")

F prefix: ['add', 'INT+', '6', 'cos', 'sinh', 'tanh', 'x']
f prefix: ['mul', 'INT-', '1', 'mul', 'add', 'INT+', '1', 'mul', 'INT-', '1', 'pow', 'tanh', 'x', 'INT+', '2', 'mul', 'cosh', 'tanh', 'x', 'sin', 'sinh', 'tanh', 'x']


### Encode input

In [26]:
x1_prefix = env.clean_prefix(['sub', 'derivative', 'f', 'x', 'x'] + f_prefix)
x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)
len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

### Decode with beam search

In [27]:
beam_size = 10
with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1, max_len=200)
    assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

### Print results

In [28]:
rows = np.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(diff_eq_sympy.subs(f(x), hyp_sympy).doit(), seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sp.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

NameError: ignored

In [58]:
print(f"Input function f: {f}")
print(f"Reference function F: {F}")
print("")

scores = []
sol = []
Val = []

for score, sent in sorted(hypotheses, key=lambda x: x[0], reverse=True):

    # parse decoded hypothesis
    ids = sent[1:].tolist()                  # decoded token IDs
    tok = [env.id2word[wid] for wid in ids]  # convert to prefix

    try:
        hyp = env.prefix_to_infix(tok)       # convert to infix
        hyp = env.infix_to_sympy(hyp)        # convert to SymPy

        # check whether we recover f if we differentiate the hypothesis
        # note that sometimes, SymPy fails to show that hyp' - f == 0, and the result is considered as invalid, although it may be correct
        res = "Vaild" if simplify(hyp.diff(x) - f, seconds=1) == 0 else "Invalid"
        

    except:
        res = "INVALID PREFIX EXPRESSION"
        hyp = tok
    sol.append(hyp)
    Val.append(res)
    scores.append(score)
    # print result
    print("%.5f  %s  %s" % (score, res, hyp))
    
  

Input function f: -(1 - tanh(x)**2)*sin(sinh(tanh(x)))*cosh(tanh(x))
Reference function F: cos(sinh(tanh(x))) + 6

-0.00289  Vaild  a8 + cos(sinh(tanh(x)))
-0.42794  Invalid  a8 - cos(sinh(tanh(x)))
-0.44621  Vaild  cos(sinh(tanh(x))) + (a8*x + x)/x
-0.49017  Vaild  (a8*x + x*cos(sinh(tanh(x))))/x
-0.49400  Vaild  (a8*x + x*cos(sinh(tanh(x))) + x)/x
-0.49610  Vaild  (x*(a8 + cos(sinh(tanh(x)))) + x)/x
-0.54927  Vaild  a8 + (x*cos(sinh(tanh(x))) + x)/x
-0.85062  Vaild  log(a8*exp(cos(sinh(tanh(x)))))
-0.93644  Vaild  atan(tan(a8 + cos(sinh(tanh(x)))))
-1.21231  Invalid  a8 + cos(tanh(tanh(x)))


In [59]:
result = {'Score':scores, 'Valid':Val, 'Solution_Hypothesis':sol}
result = pd.DataFrame(result)

In [60]:
result

Unnamed: 0,Score,Valid,Solution_Hypothesis
0,-0.002887,Vaild,a8 + cos(sinh(tanh(x)))
1,-0.427942,Invalid,a8 - cos(sinh(tanh(x)))
2,-0.446214,Vaild,cos(sinh(tanh(x))) + (a8*x + x)/x
3,-0.490169,Vaild,(a8*x + x*cos(sinh(tanh(x))))/x
4,-0.493999,Vaild,(a8*x + x*cos(sinh(tanh(x))) + x)/x
5,-0.496102,Vaild,(x*(a8 + cos(sinh(tanh(x)))) + x)/x
6,-0.549266,Vaild,a8 + (x*cos(sinh(tanh(x))) + x)/x
7,-0.850619,Vaild,log(a8*exp(cos(sinh(tanh(x)))))
8,-0.936439,Vaild,atan(tan(a8 + cos(sinh(tanh(x)))))
9,-1.21231,Invalid,a8 + cos(tanh(tanh(x)))
