# Playing with generator

In [1]:
#%%timeit
import glob
import os
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import torch
import numpy as np
import sys
import copy
from pathlib import Path
#from src.envs.generators import RandomRecurrence
#from src.envs.encoders import Equation, IntegerSeries, RealSeries
from sympy import *
import pickle
from collections import defaultdict, OrderedDict
import math

%reload_ext autoreload

#%cd ~/recur/notebooks
sys.path.append('/private/home/pakamienny/Research_2/recur')
import src
from src.envs.generators import RandomRecurrence
from src.envs.encoders import Equation

class params:
    float_precision = 3
    max_len = 30
    max_ops = 6
    max_degree = 6
    max_number = 1e100
    real_series = False
    max_exponent = 100
    init_scale = 10
    prob_const=1/3
    prob_n = 1/3
    max_int = 10
    prob_rand = 0.2
    dimension=3
    

In [47]:
from src.envs.generators import Generator, all_operators, Node, NodeList, operators_int, operators_real

class RandomRecurrence(Generator):
    def __init__(self, params):
        super().__init__(params)
        self.params = params
        
        self.real_series = params.real_series
        self.prob_const = params.prob_const
        self.prob_n = params.prob_n
        self.prob_rand = params.prob_rand
        self.max_int = params.max_int
        self.max_degree = params.max_degree
        self.max_ops = params.max_ops
        self.max_len = params.max_len
        self.init_scale = params.init_scale
        self.dimension = params.dimension
        
        if params.real_series:
            self.max_number = 10**(params.max_exponent+params.float_precision)
            self.operators = operators_real
        else:
            self.max_number = params.max_number
            self.operators = operators_int
        self.unaries = [o for o in self.operators.keys() if self.operators[o] == 1]
        self.binaries = [o for o in self.operators.keys() if self.operators[o] == 2]
        self.unary = len(self.unaries) > 0
        self.distrib = self.generate_dist(2 * self.max_ops)

        self.constants = [str(i) for i in range(-self.max_int,self.max_int+1) if i!=0]
        if params.real_series:
            self.constants += math_constants
        self.symbols = list(self.operators) + [f'x_{i}_{j}' for i in range(self.dimension) for j in range(self.max_degree+1)] + self.constants + ['n', '|']
        self.symbols += ['rand']

    def generate_dist(self, max_ops):
        """
        `max_ops`: maximum number of operators
        Enumerate the number of possible unary-binary trees that can be generated from empty nodes.
        D[e][n] represents the number of different binary trees with n nodes that
        can be generated from e empty nodes, using the following recursion:
            D(n, 0) = 0
            D(0, e) = 1
            D(n, e) = D(n, e - 1) + p_1 * D(n- 1, e) + D(n - 1, e + 1)
        p1 =  if binary trees, 1 if unary binary
        """
        p1 = 1 if self.unary else 0
        # enumerate possible trees
        D = []
        D.append([0] + ([1 for i in range(1, 2 * max_ops + 1)]))
        for n in range(1, 2 * max_ops + 1):  # number of operators
            s = [0]
            for e in range(1, 2 * max_ops - n + 1):  # number of empty nodes
                s.append(s[e - 1] + p1 * D[n - 1][e] + D[n - 1][e + 1])
            D.append(s)
        assert all(len(D[i]) >= len(D[i + 1]) for i in range(len(D) - 1))
        return D

    def generate_leaf(self, degree):
        if self.rng.rand() < self.prob_rand:
            return 'rand'
        else:
            draw = self.rng.rand()
            if draw < self.prob_const:
	            return self.rng.choice(self.constants)
            elif draw > self.prob_const and draw < self.prob_const + self.prob_n:
                return 'n'
            else:
                return f'x_{self.rng.randint(self.dimension)}_{self.rng.randint(degree)+1}'

    def generate_ops(self, arity):
        if arity==1:
            ops = [unary for unary in self.unaries]
        else:
            ops = [binary for binary in self.binaries]
        return self.rng.choice(ops)

    def sample_next_pos(self, nb_empty, nb_ops):
        """
        Sample the position of the next node (binary case).
        Sample a position in {0, ..., `nb_empty` - 1}.
        """
        assert nb_empty > 0
        assert nb_ops > 0
        probs = []
        if self.unary:
            for i in range(nb_empty):
                probs.append(self.distrib[nb_ops - 1][nb_empty - i])
        for i in range(nb_empty):
            probs.append(self.distrib[nb_ops - 1][nb_empty - i + 1])
        probs = [p / self.distrib[nb_ops][nb_empty] for p in probs]
        probs = np.array(probs, dtype=np.float64)
        e = self.rng.choice(len(probs), p=probs)
        arity = 1 if self.unary and e < nb_empty else 2
        e %= nb_empty
        return e, arity

    def generate_tree(self, nb_ops, degree):
        tree = Node(0, self.params)
        empty_nodes = [tree]
        next_en = 0
        nb_empty = 1
        while nb_ops > 0:
            next_pos, arity = self.sample_next_pos(nb_empty, nb_ops)
            for n in empty_nodes[next_en:next_en + next_pos]:
                n.value = self.generate_leaf(degree)
            next_en += next_pos
            op = self.generate_ops(arity)
            empty_nodes[next_en].value = op
            for _ in range(arity):
                e = Node(0, self.params)
                empty_nodes[next_en].push_child(e)
                empty_nodes.append(e)
            nb_empty += arity - 1 - next_pos
            nb_ops -= 1
            next_en += 1
        for n in empty_nodes[next_en:]:
            n.value = self.generate_leaf(degree)
        
        #tree = self.check_tree(tree, degree)
        
        return tree
    
   
    def generate(self, rng, nb_ops=None, deg=None, length=None, prediction_points=False):
        """prediction_points is a boolean which indicates whether we compute prediction points. By default we do not to save time. """
        self.rng = rng
        self.rng.seed() # TODO : fix this

        if deg is None:    deg    = self.rng.randint(1, self.max_degree + 1)
        if length is None: length = self.rng.randint(3*deg, self.max_len+1)

        if prediction_points:
            length +=  self.params.n_predictions
        
        trees = []
        if nb_ops is None: nb_ops = self.rng.randint(1, self.max_ops + 1, size=(self.dimension,))
        elif type(nb_ops)==int: nb_ops = [nb_ops]*self.dimension
            
        for i in range(self.dimension):
            trees.append(self.generate_tree(nb_ops[i],deg))
        tree = NodeList(trees)
        
        recurrence_degrees = tree.get_recurrence_degrees()
        min_recurrence_degree, max_recurrence_degree = min(recurrence_degrees), max(recurrence_degrees)

        initial_conditions = [[self.rng.uniform(-self.init_scale, self.init_scale) if self.real_series else self.rng.randint(-self.init_scale, self.init_scale+1) \
                               for _ in range(recurrence_degrees[dim])] for dim in range(self.dimension)]

        series = [initial_conditions[dim][deg] for dim in range(self.dimension) for deg in range(min_recurrence_degree)]
    
        print(tree.infix(), series)
        ##complete initial conditions by computing the real sequence
        for degree in range(min_recurrence_degree, max_recurrence_degree):
            dim_to_compute = [dim for dim in range(self.dimension)  if degree>=recurrence_degrees[dim]]
            try:
                next_values = tree.val(series,dim_to_compute=dim_to_compute)
            except Exception as e:
                #print(e, "degree: {}".format(degree), series, tree.infix())
                return None, None, None
            for dim in range(self.dimension):
                if next_values[dim] is None:
                    next_values[dim]=initial_conditions[dim][degree]
            next_values_array = np.array(next_values, dtype=np.float)
            if np.any(np.isnan(next_values_array)) or np.any(np.abs(next_values_array)>self.max_number): 
                return None, None, None
            series.extend(next_values)

        assert len(series)==max_recurrence_degree*self.dimension, "Problem with initial conditions"

        ##compute remaining points with given initial conditions
        for i in range(max_recurrence_degree, length):
            try:
                vals = tree.val(series)
            except Exception as e:
                #print(e, series, tree.infix())
                return None, None, None
            print(vals)
            vals_array = np.array(vals, dtype=np.float)
            if np.any(np.isnan(vals_array)) or np.any(np.abs(vals_array)>self.max_number): 
                return None, None, None
            series.extend(vals)
            
        if prediction_points:
            series_input = series[:-self.params.n_predictions*self.dimension+1]
            series_to_predict = series[-self.params.n_predictions*self.dimension:]
        else:
            series_input = series
            series_to_predict = None
            
        return tree, series_input, series_to_predict

    def evaluate(self, src, tgt, hyp, n_predictions=3):
        src_hyp = copy.deepcopy(src)
        src_tgt = copy.deepcopy(src)
        errors = []
        for i in range(n_predictions):
            try:
                pred = hyp.val(src_hyp, deterministic=True)
                src_hyp.extend(pred)
                true = tgt.val(src_tgt, deterministic=True)
                src_tgt.extend(true)
                errors.append(max([abs(float(p-t)/float(t+1e-100)) for p,t in zip(pred, true)]))
            except Exception as e:
                print(e)
                return -1
        return max(errors)        

    def chunks_idx(self, step, min, max):
        curr=min
        while curr<max:
            yield [i for i in range(curr, curr+step)]
            curr+=step

    def evaluate_numerical(self, tgt, hyp):
        errors = []
        
        for idx in self.chunks_idx(self.dimension, min=0, max=len(tgt)):
            try:
                pred=[hyp[i] for i in idx]
                true=[tgt[i] for i in idx]
                errors.append(max([abs(float(p-t)/float(t+1e-100)) for p,t in zip(pred, true)]))
            except IndexError or TypeError:
                return -1
        return max(errors)  

    def evaluate_without_target(self, src, hyp, n_predictions=3):
        errors = []
        targets = src[-n:]
        src = src[:-n]
        for i in range(n_predictions):
            pred = hyp.val(src)
            true = targets[i]
            src.extend(pred)
            errors.append(max([abs(float(p-t)/float(t+1e-100)) for p,t in zip(pred, true)]))
            # except:
            #     return -1
        return max(errors)
        


In [46]:
x=np.array([np.nan],dtype=np.float)
np.isnan(x)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.


array([ True])

In [48]:
generator = RandomRecurrence(params)

rng = np.random.RandomState(0)
import time 

deb = time.time()
k_trials=1000
successful_trials=0
for k in range(k_trials):
    tree, series, _ = generator.generate(rng, length=15, nb_ops = 1)
    if tree is not None:
        successful_trials+=1
        #print(tree)
        #print(series)
        #for i in range(params.dimension):
        #    plt.title(tree.infix())
        #    plt.plot(series[i::params.dimension])
        #plt.yscale('symlog')
        #plt.show()
        
print("Percentage of successful generation: {}".format(successful_trials/k_trials*100))
print("Time: {}".format(time.time()-deb))

(n idiv 1) | (6)**2 | (5 sub n) []
[1, 36, 4]
[2, 36, 3]
[3, 36, 2]
[4, 36, 1]
[5, 36, 0]
[6, 36, -1]
[7, 36, -2]
[8, 36, -3]
[9, 36, -4]
[10, 36, -5]
[11, 36, -6]
[12, 36, -7]
[13, 36, -8]
[14, 36, -9]
[15, 36, -10]
(n add rand) | (x_1_1 mul -3) | (rand add x_0_1) []
[0, 6, 3]
[3, -18, 0]
[3, 54, 3]
[4, -162, 2]
[4, 486, 4]
[6, -1458, 4]
[7, 4374, 7]
[9, -13122, 6]
[10, 39366, 9]
[10, -118098, 10]
[11, 354294, 10]
[13, -1062882, 11]
[13, 3188646, 13]
[14, -9565938, 13]
[14, 28697814, 13]
(3)**2 | abs(3) | (x_2_1 sub rand) []
[9, 3, 10]
[9, 3, 10]
[9, 3, 9]
[9, 3, 10]
[9, 3, 11]
[9, 3, 10]
[9, 3, 11]
[9, 3, 10]
[9, 3, 11]
[9, 3, 12]
[9, 3, 12]
[9, 3, 11]
[9, 3, 12]
[9, 3, 12]
[9, 3, 12]
(x_2_2 mul -6) | (n sub x_1_3) | (rand sub rand) []
[-7, -10, 2]
[-7, -10, -1]
[-12, -10, -2]
[6, 14, 0]
[12, 15, 1]
[0, 16, 1]
[-6, -7, 0]
[-6, -7, 2]
[0, -7, 2]
[-12, 17, -1]
[-12, 18, 1]
[6, 19, 1]
[-6, -4, 0]
[-6, -4, 0]
[0, -4, -2]
(1 mul -5) | (x_1_2 idiv 3) | (-2 mul x_0_5) []
[-5, 3, -6]
[-5, -6

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  else: return self.children[0].val(series) // self.children[1].val(series)



[1, 0, 0]
[1, 0, 0]
[1, 0, 1]
[0, 0, 1]
[0, 0, 1]
[1, 0, 0]
[1, 0, 0]
[0, 0, 1]
[0, 0, 1]
[1, 0, 0]
[0, 0, 0]
[1, 0, 1]
(x_1_6 mod rand) | (n mod rand) | (-6 sub x_2_5) []
[-10, nan, 10]
(x_2_1 idiv x_0_3) | (n idiv x_0_3) | abs(rand) []
[-4, 6, 1]
[3, -9, 1]
[7, -2, 1]
[-1, -1, 1]
[0, 1, 1]
[0, 0, 1]
[-1, -7, 0]
[nan, nan, 1]
(8 sub rand) | (n mul n) | (-2)**2 []
[7, 1, 4]
[7, 4, 4]
[9, 9, 4]
[8, 16, 4]
[8, 25, 4]
[9, 36, 4]
[8, 49, 4]
[9, 64, 4]
[9, 81, 4]
[7, 100, 4]
[8, 121, 4]
[9, 144, 4]
[8, 169, 4]
[9, 196, 4]
[9, 225, 4]
(x_2_6 mul rand) | (n mod n) | (n)**2 []
[1, 0, 1]
[9, 0, 4]
[10, 0, 9]
[0, 0, 16]
[-5, 0, 25]
[6, 0, 36]
[-1, 0, 49]
[0, 0, 64]
[-9, 0, 81]
[-16, 0, 100]
[-25, 0, 121]
[-36, 0, 144]
[49, 0, 169]
[0, 0, 196]
[-81, 0, 225]
(rand sub -8) | abs(rand) | (6 mod n) []
[7, 0, 0]
[8, 1, 0]
[7, 1, 0]
[7, 1, 2]
[9, 1, 1]
[7, 1, 0]
[8, 0, 6]
[8, 1, 6]
[9, 0, 6]
[8, 1, 6]
[7, 0, 6]
[7, 1, 6]
[9, 1, 6]
[8, 1, 6]
[8, 1, 6]
(8 mod x_1_2) | (rand mod x_1_3) | (x_0_3)**2 [-2, 

# Checking encoder

In [None]:
class params:
    int_base = 10
    has_separator = False
    
encoder = Equation(params)
print(tree.infix())
encoded = encoder.encode(tree)
print(encoded)
decoded = encoder.decode(encoded)
print(decoded.infix())

In [None]:
class params:
    a=1
    
from src.envs.encoders import RealSeries

encoder = RealSeries(params)
x = series
print(x)
encoded = encoder.encode(x)
print(encoded)
decoded = encoder.decode(encoded)
print(decoded)