# Models Behavior on Some Real Problems

In [1]:
import os
import time

import numpy
import pandas
import sympy
import torch

from src.envs import build_env
from src.envs.char_sp import InvalidPrefixExpression, ValueErrorExpression
from src.envs.sympy_utils import simplify
from src.model import build_modules
from src.utils import AttrDict
from src.utils import to_cuda

## First Order Differential Equations Problems

Use the First Order Differential Equations model (*ode1.pth*) to find the solutions of some equation used for the
computation of generative functions of various problems.

Get Trained Model:

In [2]:
model_path = '../models/differential-equations/ode1.pth'
assert os.path.isfile(model_path)

params = AttrDict({

    # Environment Parameters
    'env_name': 'char_sp',
    'int_base': 10,
    'balanced': False,
    'positive': True,
    'precision': 10,
    'n_variables': 1,
    'n_coefficients': 0,
    'leaf_probs': '0.75,0,0.25,0',
    'max_len': 512,
    'max_int': 5,
    'max_ops': 15,
    'max_ops_G': 15,
    'clean_prefix_expr': True,
    'rewrite_functions': '',
    'tasks': 'ode1',
    'operators': 'add:10,sub:3,mul:10,div:5,sqrt:4,pow2:4,pow3:2,pow4:1,pow5:1,ln:4,exp:4,sin:4,cos:4,tan:4,asin:1,'
                 'acos:1,atan:1,sinh:1,cosh:1,tanh:1,asinh:1,acosh:1,atanh:1',

    # Model Parameters
    'cpu': False,
    'emb_dim': 1024,
    'n_enc_layers': 6,
    'n_dec_layers': 6,
    'n_heads': 8,
    'dropout': 0,
    'attention_dropout': 0,
    'sinusoidal_embeddings': False,
    'share_inout_emb': True,
    'reload_model': model_path,

})

env = build_env(params)
x = env.local_dict['x']
f = env.local_dict['f']
g = env.local_dict['g']
h = env.local_dict['h']

modules = build_modules(env, params)
encoder = modules['encoder']
decoder = modules['decoder']

beam_size = 50

### Differential Equation for Generating Function of the Central Binomial Coefficients $\binom{2n}{n}$

Declare central binomial coefficients $\binom{2n}{n}$ differential equation and solution $y$:

In [3]:
diff_eq_infix = "(1-4*x)*diff(f(x),x)-2*f(x)"
y_infix = "1/sqrt(1-4*x)"

Converts **diff_eq_infix** and **y_infix** to a type that can be used inside SymPy:

In [4]:
diff_eq_sympy = sympy.sympify(diff_eq_infix, locals=env.local_dict)
diff_eq_sympy

(1 - 4*x)*Derivative(f(x), x) - 2*f(x)

In [5]:
y_sympy = sympy.sympify(y_infix, locals=env.local_dict)
y_sympy

1/sqrt(1 - 4*x)

Expand the expression of the equation to allow the model to calculate the solution more easily:

In [6]:
diff_eq_sympy = sympy.expand(diff_eq_sympy)
diff_eq_sympy

-4*x*Derivative(f(x), x) - 2*f(x) + Derivative(f(x), x)

Use the model to find the solution $y$:

In [7]:
start_time = time.time()

diff_eq_prefix = env.sympy_to_prefix(diff_eq_sympy)

x1_prefix = env.clean_prefix(diff_eq_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [8]:
end_time - start_time

0.8388020992279053

Extract scores and solution hypotheses:

In [9]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(diff_eq_sympy.subs(f(x), hyp_sympy).doit(), seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [10]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.094559,$\frac{a_{8}}{\sqrt{1 - 4 x}}$,YES
2,-0.108193,$\sqrt{\frac{a_{8} x}{- 4 x^{2} + x}}$,YES
3,-0.137775,$\sqrt{\frac{a_{8}}{1 - 4 x}}$,YES
4,-0.159204,$\frac{a_{8} x}{\sqrt{x \left(- 4 x^{2} + x\right)}}$,YES
5,-0.165491,$a_{8} \sqrt{\frac{x}{- 4 x^{2} + x}}$,YES
6,-0.166589,$\frac{a_{8}}{\sqrt{\frac{1}{4} - x}}$,YES
7,-0.190779,$\frac{a_{8}}{\sqrt{\frac{1}{2} - 2 x}}$,YES
8,-0.19853,$\frac{x}{\sqrt{a_{8} x \left(- 4 x^{2} + x\right)}}$,YES
9,-0.200983,$x \sqrt{\frac{a_{8}}{x \left(- 4 x^{2} + x\right)}}$,YES
10,-0.206921,$\frac{a_{8}}{\sqrt{\frac{- 4 x^{2} + x}{x}}}$,YES


### Differential Equation for Generating Function of the Catalan Numbers $\frac{1}{n+1}\binom{2n}{n}$

Declare Catalan Numbers $\frac{1}{n+1}\binom{2n}{n}$ differential equation and solution $y$:

In [11]:
diff_eq_infix = "diff(f(x),x)*(x-4*x**2)+(1-2*x)*f(x)-1"
y_infix = "(1-sqrt(1-4*x))/(2*x)"

Converts **diff_eq_infix** and **y_infix** to a type that can be used inside SymPy:

In [12]:
diff_eq_sympy = sympy.sympify(diff_eq_infix, locals=env.local_dict)
diff_eq_sympy

(1 - 2*x)*f(x) + (-4*x**2 + x)*Derivative(f(x), x) - 1

In [13]:
y_sympy = sympy.sympify(y_infix, locals=env.local_dict)
y_sympy

(1 - sqrt(1 - 4*x))/(2*x)

Expand the expression of the equation to allow the model to calculate the solution more easily:

In [14]:
diff_eq_sympy = sympy.expand(diff_eq_sympy)
diff_eq_sympy

-4*x**2*Derivative(f(x), x) - 2*x*f(x) + x*Derivative(f(x), x) + f(x) - 1

Use the model to find the solution $y$:

In [15]:
start_time = time.time()

diff_eq_prefix = env.sympy_to_prefix(diff_eq_sympy)

x1_prefix = env.clean_prefix(diff_eq_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [16]:
end_time - start_time

0.32935523986816406

Extract scores and solution hypotheses:

In [17]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(diff_eq_sympy.subs(f(x), hyp_sympy).doit(), seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [18]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.087689,$\frac{a_{8} \sqrt{1 - 4 x} + x}{x}$,NO
2,-0.134191,$\frac{a_{8} \sqrt{4 x - 1} + x}{x}$,NO
3,-0.149671,$\frac{a_{8} \sqrt{\frac{1}{4} - x} + x}{x}$,NO
4,-0.172542,$\frac{x + \sqrt{a_{8} \left(1 - 4 x\right)}}{x}$,NO
5,-0.17351,$\frac{a_{8} \sqrt{\frac{- 4 x^{2} + x}{x}} + x}{x}$,NO
6,-0.190297,$\frac{a_{8} \sqrt{1 - 4 x}}{x} + \frac{1}{x}$,NO
7,-0.191963,$\frac{a_{8} \sqrt{x - \frac{1}{4}} + x}{x}$,NO
8,-0.194101,$\frac{x + \sqrt{\frac{a_{8} \left(- 4 x^{2} + x\right)}{x}}}{x}$,NO
9,-0.194893,$\frac{x + \sqrt{a_{8} \left(4 x - 1\right)}}{x}$,NO
10,-0.197811,$\frac{a_{8} \sqrt{2 x - \frac{1}{2}} + x}{x}$,NO


### Differential Equation for Generating Function of the Average Number of Comparisons in the Quicksort

Declare Average number of comparisons in the Quicksort differential equation and solution $y$:

In [19]:
diff_eq_infix = "diff(f(x),x)*(1-x)**3-2*(1-x)**2*f(x)-2"
y_infix = "2/(1-x)**2 * ln(1/(1-x))"

Converts **diff_eq_infix** and **y_infix** to a type that can be used inside SymPy:

In [20]:
diff_eq_sympy = sympy.sympify(diff_eq_infix, locals=env.local_dict)
diff_eq_sympy

(1 - x)**3*Derivative(f(x), x) - 2*(1 - x)**2*f(x) - 2

In [21]:
y_sympy = sympy.sympify(y_infix, locals=env.local_dict)
y_sympy

2*log(1/(1 - x))/(1 - x)**2

Expand the expression of the equation to allow the model to calculate the solution more easily:

In [22]:
diff_eq_sympy = sympy.expand(diff_eq_sympy)
diff_eq_sympy

-x**3*Derivative(f(x), x) - 2*x**2*f(x) + 3*x**2*Derivative(f(x), x) + 4*x*f(x) - 3*x*Derivative(f(x), x) - 2*f(x) + Derivative(f(x), x) - 2

Use the model to find the solution $y$:

In [23]:
start_time = time.time()

diff_eq_prefix = env.sympy_to_prefix(diff_eq_sympy)

x1_prefix = env.clean_prefix(diff_eq_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [24]:
end_time - start_time

0.41722869873046875

Extract scores and solution hypotheses:

In [25]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(diff_eq_sympy.subs(f(x), hyp_sympy).doit(), seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [26]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.120965,$\frac{a_{8} + \frac{2}{1 - x}}{\left(1 - x\right)^{2}}$,NO
2,-0.136975,$\frac{a_{8} + \frac{2}{1 - x}}{\left(x - 1\right)^{2}}$,NO
3,-0.149535,$\frac{a_{8} + \frac{2 x}{x - 1}}{\left(1 - x\right)^{2}}$,NO
4,-0.161307,$\frac{a_{8} - 2 \log{\left(1 - x \right)}}{\left(1 - x\right)^{2}}$,YES
5,-0.162179,$\frac{a_{8} + 2 x}{\left(1 - x\right)^{2}}$,NO
6,-0.165757,$\frac{a_{8} + \frac{2 x}{- x^{2} + x}}{\left(1 - x\right)^{2}}$,NO
7,-0.166652,$\frac{a_{8} + \frac{2 x}{- x^{2} + x}}{\left(x - 1\right)^{2}}$,NO
8,-0.181282,$\frac{a_{8} + \frac{2 x}{1 - x}}{\left(1 - x\right)^{2}}$,NO
9,-0.185461,$\frac{a_{8} + \frac{2 x}{x - 1}}{\left(x - 1\right)^{2}}$,NO
10,-0.192135,$\frac{a_{8} + 2 \log{\left(1 - x \right)}}{\left(1 - x\right)^{2}}$,NO


### Differential Equation for Generating Function of the Average Number of Exchanges in the Quicksort

Declare Average number of exchanges in the Quicksort differential equation and solution $y$:

In [27]:
diff_eq_infix = "diff(f(x),x)*6*(1-x)**3 - 12*(1-x)**2*f(x) - (x**2*(3-x))"
y_infix = "(1/3)*(1/(1-x)**2)*ln(1/(1-x))+(x**3-3*x**2-6*t)/(18*(1-x)**2)"

Converts **diff_eq_infix** and **y_infix** to a type that can be used inside SymPy:

In [28]:
diff_eq_sympy = sympy.sympify(diff_eq_infix, locals=env.local_dict)
diff_eq_sympy

-x**2*(3 - x) + 6*(1 - x)**3*Derivative(f(x), x) - 12*(1 - x)**2*f(x)

In [29]:
y_sympy = sympy.sympify(y_infix, locals=env.local_dict)
y_sympy

(-6*t + x**3 - 3*x**2)/(18*(1 - x)**2) + log(1/(1 - x))/(3*(1 - x)**2)

Expand the expression of the equation to allow the model to calculate the solution more easily:

In [30]:
diff_eq_sympy = sympy.expand(diff_eq_sympy)
diff_eq_sympy

-6*x**3*Derivative(f(x), x) + x**3 - 12*x**2*f(x) + 18*x**2*Derivative(f(x), x) - 3*x**2 + 24*x*f(x) - 18*x*Derivative(f(x), x) - 12*f(x) + 6*Derivative(f(x), x)

Use the model to find the solution $y$:

In [31]:
start_time = time.time()

diff_eq_prefix = env.sympy_to_prefix(diff_eq_sympy)

x1_prefix = env.clean_prefix(diff_eq_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [32]:
end_time - start_time

0.43843841552734375

Extract scores and solution hypotheses:

In [33]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(diff_eq_sympy.subs(f(x), hyp_sympy).doit(), seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [34]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.130017,$\frac{a_{8} x^{2}}{\left(x^{2} + x\right)^{2}} + \frac{x^{3}}{\left(6 - 6 x\right)^{2}}$,NO
2,-0.13531,$\frac{a_{8}}{\left(1 - x\right)^{2}} + \frac{x^{3}}{6 x - 6}$,NO
3,-0.142492,$\frac{a_{8}}{\left(1 - x\right)^{2}} + \frac{x^{3}}{9 \left(2 x - 2\right)^{2}}$,NO
4,-0.143277,$\frac{a_{8} x^{2}}{\left(x^{2} + x\right)^{2}} + \frac{x^{3}}{\left(6 x - 6\right)^{2}}$,NO
5,-0.144215,$\frac{x}{18} + \frac{a_{8} - x}{18 \left(1 - x\right)^{2}}$,NO
6,-0.151607,$\frac{a_{8}}{\left(x - 1\right)^{2}} + \frac{x^{3}}{9 \left(2 x - 2\right)^{2}}$,NO
7,-0.155293,$\frac{a_{8} x^{2}}{\left(x^{2} - x\right)^{2}} + \frac{x^{3}}{\left(6 - 6 x\right)^{2}}$,NO
8,-0.158034,$\frac{a_{8}}{\left(x - 1\right)^{2}} + \frac{x^{3}}{6 \left(x - 1\right)^{2}}$,NO
9,-0.160683,$\frac{a_{8} x^{2}}{\left(x^{2} + x\right)^{2}} + \frac{x^{3}}{\left(3 x - 3\right)^{2}}$,NO
10,-0.161167,$\frac{a_{8}}{\left(1 - x\right)^{2}} + \frac{x^{3}}{3 \left(2 x - 2\right)^{2}}$,NO


### Differential Equation for Generating Function of the Number of leaves in Binary Search Trees

Declare the number of leaves in binary search trees differential equation and solution $y$:

In [35]:
diff_eq_infix = "diff(f(x),x)*(1-x) -(1-x) -2*f(x)"
y_infix = "(1/3)*(1/(1-x)**2)+1/3*(x-1)"

Converts **diff_eq_infix** and **y_infix** to a type that can be used inside SymPy:

In [36]:
diff_eq_sympy = sympy.sympify(diff_eq_infix, locals=env.local_dict)
diff_eq_sympy

x + (1 - x)*Derivative(f(x), x) - 2*f(x) - 1

In [37]:
y_sympy = sympy.sympify(y_infix, locals=env.local_dict)
y_sympy

x/3 - 1/3 + 1/(3*(1 - x)**2)

Expand the expression of the equation to allow the model to calculate the solution more easily:

In [38]:
diff_eq_sympy = sympy.expand(diff_eq_sympy)
diff_eq_sympy

-x*Derivative(f(x), x) + x - 2*f(x) + Derivative(f(x), x) - 1

Use the model to find the solution $y$:

In [39]:
start_time = time.time()

diff_eq_prefix = env.sympy_to_prefix(diff_eq_sympy)

x1_prefix = env.clean_prefix(diff_eq_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [40]:
end_time - start_time

0.36043882369995117

Extract scores and solution hypotheses:

In [41]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(diff_eq_sympy.subs(f(x), hyp_sympy).doit(), seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [42]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.06218,$\frac{a_{8}}{\left(1 - x\right)^{2}} + \frac{x}{3} - \frac{1}{3}$,YES
2,-0.086932,$\frac{a_{8} x^{2}}{\left(- x^{2} + x\right)^{2}} + \frac{x}{3} - \frac{1}{3}$,YES
3,-0.094559,$\frac{a_{8} x^{2}}{\left(- x^{2} + x\right)^{2}} + \frac{x}{3}$,NO
4,-0.097033,$\frac{a_{8}}{\left(1 - x\right)^{2}} + \frac{x}{3}$,NO
5,-0.106211,$\frac{a_{8}}{\left(1 - x\right)^{2}} + \frac{x}{3} - \frac{1}{2}$,NO
6,-0.112487,$\frac{a_{8}}{x \left(x - 2\right) + 1} + \frac{x}{3}$,NO
7,-0.117153,$\frac{a_{8} x^{2}}{\left(- x^{3} + x^{2}\right)^{2}} + \frac{x}{3}$,NO
8,-0.119727,$\frac{a_{8}}{x^{2} - 2 x + 1} + \frac{x}{3}$,NO
9,-0.124708,$\frac{a_{8} x^{2}}{\left(- x^{2} + x\right)^{2}} + \frac{x}{3} - \frac{1}{2}$,NO
10,-0.136849,$\frac{a_{8} x^{2}}{\left(x^{2} - x\right)^{2}} + \frac{x}{3} - \frac{1}{3}$,YES


## Differential Equation that cannot be solved by Mathematica and Matlab

Declare differential equation and solution $y$:

In [43]:
diff_eq_infix = "3*x*f(x)*cos(x)-sqrt(9*x**2*sin(x)**2+1)*diff(f(x),x)+3*f(x)*sin(x)"
y_infix = "c*exp(sinh(3*x*sin(x))**-1)"

Converts **diff_eq_infix** and **y_infix** to a type that can be used inside SymPy:

In [44]:
diff_eq_sympy = sympy.sympify(diff_eq_infix, locals=env.local_dict)
diff_eq_sympy

3*x*f(x)*cos(x) - sqrt(9*x**2*sin(x)**2 + 1)*Derivative(f(x), x) + 3*f(x)*sin(x)

In [45]:
y_sympy = sympy.sympify(y_infix, locals=env.local_dict)
y_sympy

c*exp(1/sinh(3*x*sin(x)))

Expand the expression of the equation to allow the model to calculate the solution more easily:

In [46]:
diff_eq_sympy = sympy.expand(diff_eq_sympy)
diff_eq_sympy

3*x*f(x)*cos(x) - sqrt(9*x**2*sin(x)**2 + 1)*Derivative(f(x), x) + 3*f(x)*sin(x)

Use the model to find the solution $y$:

In [47]:
start_time = time.time()

diff_eq_prefix = env.sympy_to_prefix(diff_eq_sympy)

x1_prefix = env.clean_prefix(diff_eq_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [48]:
end_time - start_time

0.18436098098754883

Extract scores and solution hypotheses:

In [49]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(diff_eq_sympy.subs(f(x), hyp_sympy).doit(), seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [50]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.056235,$e^{a_{8} + \operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)}}$,YES
2,-0.069612,$a_{8} e^{\operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)}}$,YES
3,-0.36448,$4 e^{a_{8} + \operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)}}$,YES
4,-0.371019,$5 e^{a_{8} + \operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)}}$,YES
5,-0.379251,$e^{a_{8} + \operatorname{asinh}{\left(3 x \cos{\left(x \right)} \tan{\left(x \right)} \right)}}$,YES
6,-0.384521,$3 e^{a_{8} + \operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)}}$,YES
7,-0.385925,$a_{8} e^{\operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)} + 1}$,YES
8,-0.3887,$2 e^{a_{8} + \operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)}}$,YES
9,-0.388945,$a_{8} e^{\operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)} + 4}$,YES
10,-0.393191,$a_{8} e^{\operatorname{asinh}{\left(3 x \sin{\left(x \right)} \right)} + 5}$,YES


## Integrals Calculations

Use the Integral Model (*fwd_bwd_ibp.pth*) to find the solutions of some integrals.

Get Trained Model:

In [51]:
model_path = '../models/integrations/fwd_bwd_ibp.pth'
assert os.path.isfile(model_path)

params = AttrDict({

    # Environment Parameters
    'env_name': 'char_sp',
    'int_base': 10,
    'balanced': False,
    'positive': True,
    'precision': 10,
    'n_variables': 1,
    'n_coefficients': 0,
    'leaf_probs': '0.75,0,0.25,0',
    'max_len': 512,
    'max_int': 5,
    'max_ops': 15,
    'max_ops_G': 15,
    'clean_prefix_expr': True,
    'rewrite_functions': '',
    'tasks': 'prim_fwd,prim_bwd,prim_ibp',
    'operators': 'add:10,sub:3,mul:10,div:5,sqrt:4,pow2:4,pow3:2,pow4:1,pow5:1,ln:4,exp:4,sin:4,cos:4,tan:4,asin:1,'
                 'acos:1,atan:1,sinh:1,cosh:1,tanh:1,asinh:1,acosh:1,atanh:1',

    # Model Parameters
    'cpu': False,
    'emb_dim': 1024,
    'n_enc_layers': 6,
    'n_dec_layers': 6,
    'n_heads': 8,
    'dropout': 0,
    'attention_dropout': 0,
    'sinusoidal_embeddings': False,
    'share_inout_emb': True,
    'reload_model': model_path,

})

env = build_env(params)
x = env.local_dict['x']

modules = build_modules(env, params)
encoder = modules['encoder']
decoder = modules['decoder']

beam_size = 50

## Integral that cannot be solved by Mathematica and Matlab

Declare function to integrate and solution $y$:

In [52]:
function_infix = "(1-x**4)/((1+x**2+x**4)*sqrt(1+x**4))"
y_infix = "atan(x/sqrt(1+x**4))"

Converts **function_infix** and **y_infix** to a type that can be used inside SymPy:

In [53]:
function_sympy = sympy.sympify(function_infix, locals=env.local_dict)
function_sympy

(1 - x**4)/(sqrt(x**4 + 1)*(x**4 + x**2 + 1))

In [54]:
y_sympy = sympy.sympify(y_infix, locals=env.local_dict)
y_sympy

atan(x/sqrt(x**4 + 1))

Use the model to find the solution $y$:

In [55]:
start_time = time.time()

function_prefix = env.sympy_to_prefix(function_sympy)

x1_prefix = env.clean_prefix(['sub', 'derivative', 'f', 'x', 'x'] + function_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [56]:
end_time - start_time

0.2552757263183594

Extract scores and solution hypotheses:

In [57]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(hyp_sympy.diff(x) - function_sympy, seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [58]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.016911,$\operatorname{atan}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,YES
2,-0.120237,$\operatorname{asinh}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,NO
3,-0.230831,$\operatorname{atanh}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,NO
4,-0.271258,$\frac{x}{\sqrt{x^{4} + 1}}$,NO
5,-0.296699,$\operatorname{asinh}{\left(\frac{x}{\sqrt{x^{2} + 1}} \right)}$,NO
6,-0.32126,$\operatorname{asin}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,NO
7,-0.323282,$\operatorname{atanh}{\left(\frac{\sqrt{x^{4} + 1}}{x} \right)}$,NO
8,-0.345199,$\operatorname{atan}{\left(x \sqrt{x^{4} + 1} \right)}$,NO
9,-0.352913,$\operatorname{atan}{\left(\frac{x^{2}}{\sqrt{x^{5} + x}} \right)}$,NO
10,-0.359088,$\operatorname{atan}{\left(\frac{\sqrt{x^{4} + 1}}{x} \right)}$,NO


## Various Tough Integrals

Declare function to integrate and solution $y$:

In [59]:
# function_infix = "(1+x) / (x**2 * (2-x))"
# function_infix = "1/(x * (log(x)**2 + 4*log(x) + 5))"
# function_infix = "(x+1)/x * log(Abs(x))"
# function_infix = "x**3*asin(x/sqrt(1+x**2))"
function_infix = "(sqrt(x**2+  1) - sqrt(x**2 - 1)) / sqrt(x**4 - 1)"

Use the model to find the solution $y$:

In [60]:
start_time = time.time()

function_prefix = env.sympy_to_prefix(function_sympy)

x1_prefix = env.clean_prefix(['sub', 'derivative', 'f', 'x', 'x'] + function_prefix)

x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)

len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1,
                                       max_len=params.max_len)

end_time = time.time()

assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

Execution time (seconds):

In [61]:
end_time - start_time

0.24693799018859863

Extract scores and solution hypotheses:

In [62]:
rows = numpy.arange(1, beam_size + 1)
columns = ['Score', 'Solution Hypothesis', 'Valid']
results = []

for score, sequence in sorted(hypotheses, reverse=True):
    # Parse decoded hypothesis
    ids = sequence[1:].tolist()  # Decoded token IDs
    hyp_prefix = [env.id2word[word_id] for word_id in ids]  # Convert to prefix notation

    try:
        hyp_infix = env.prefix_to_infix(hyp_prefix)  # Convert to infix notation
        hyp_sympy = env.infix_to_sympy(hyp_infix)  # Convert to SymPy

        # Check if the hypothesis is a valid solution, replacing 'hyp_sympy' with 'f(x)' in the equation
        validation = "YES" if simplify(hyp_sympy.diff(x) - function_sympy, seconds=1) == 0 else "NO"

        # Transform hypothesis to a valid latex expression
        hyp_expr = "$" + sympy.latex(env.infix_to_sympy(hyp_infix)) + "$"

    except (InvalidPrefixExpression, ValueErrorExpression):
        validation = "INVALID PREFIX EXPRESSION"
        hyp_expr = hyp_prefix

    # Prepare results
    results.append([score, hyp_expr, validation])

Print results:

In [63]:
pandas.set_option('max_colwidth', None)
pandas.DataFrame(results, index=rows, columns=columns).style.set_properties(**{'text-align': 'center'})

Unnamed: 0,Score,Solution Hypothesis,Valid
1,-0.016911,$\operatorname{atan}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,YES
2,-0.120237,$\operatorname{asinh}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,NO
3,-0.230831,$\operatorname{atanh}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,NO
4,-0.271258,$\frac{x}{\sqrt{x^{4} + 1}}$,NO
5,-0.296699,$\operatorname{asinh}{\left(\frac{x}{\sqrt{x^{2} + 1}} \right)}$,NO
6,-0.32126,$\operatorname{asin}{\left(\frac{x}{\sqrt{x^{4} + 1}} \right)}$,NO
7,-0.323282,$\operatorname{atanh}{\left(\frac{\sqrt{x^{4} + 1}}{x} \right)}$,NO
8,-0.345199,$\operatorname{atan}{\left(x \sqrt{x^{4} + 1} \right)}$,NO
9,-0.352913,$\operatorname{atan}{\left(\frac{x^{2}}{\sqrt{x^{5} + x}} \right)}$,NO
10,-0.359088,$\operatorname{atan}{\left(\frac{\sqrt{x^{4} + 1}}{x} \right)}$,NO
