# Playing with generator

In [1]:
#%%timeit
import glob
import os
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import torch
import numpy as np
import sys
import copy
from pathlib import Path
#from src.envs.generators import RandomRecurrence
#from src.envs.encoders import Equation, IntegerSeries, RealSeries
from sympy import *
import pickle
from collections import defaultdict, OrderedDict
import math

%reload_ext autoreload

#%cd ~/recur/notebooks
sys.path.append('/private/home/pakamienny/Research_2/recur')
import src
from src.envs.generators import RandomRecurrence
from src.envs.encoders import Equation, RealSeries, IntegerSeries
from src.envs.recurrence import RecurrenceEnvironment


In [12]:
from src.envs.generators import *

class RandomRecurrence(Generator):
    def __init__(self, params):
        super().__init__(params)
        self.params = params
        
        self.real_series = params.real_series
        self.prob_const = params.prob_const
        self.prob_n = params.prob_n
        self.prob_rand = params.prob_rand
        self.max_int = params.max_int
        self.max_degree = params.max_degree
        self.max_ops = params.max_ops
        self.max_len = params.max_len
        self.init_scale = params.init_scale
        self.dimension = params.dimension
        
        if params.real_series:
            self.max_number = 10**(params.max_exponent+params.float_precision)
            self.operators = operators_real
        else:
            self.max_number = params.max_number
            self.operators = operators_int
        self.unaries = [o for o in self.operators.keys() if self.operators[o] == 1]
        self.binaries = [o for o in self.operators.keys() if self.operators[o] == 2]
        self.unary = len(self.unaries) > 0
        self.distrib = self.generate_dist(2 * self.max_ops)

        self.constants = [str(i) for i in range(-self.max_int,self.max_int+1) if i!=0]
        if params.real_series:
            self.constants += math_constants
        self.symbols = list(self.operators) + [f'x_{i}_{j}' for i in range(self.dimension) for j in range(self.max_degree+1)] + self.constants + ['n', '|']
        self.symbols += ['rand']

    def generate_dist(self, max_ops):
        """
        `max_ops`: maximum number of operators
        Enumerate the number of possible unary-binary trees that can be generated from empty nodes.
        D[e][n] represents the number of different binary trees with n nodes that
        can be generated from e empty nodes, using the following recursion:
            D(n, 0) = 0
            D(0, e) = 1
            D(n, e) = D(n, e - 1) + p_1 * D(n- 1, e) + D(n - 1, e + 1)
        p1 =  if binary trees, 1 if unary binary
        """
        p1 = 1 if self.unary else 0
        # enumerate possible trees
        D = []
        D.append([0] + ([1 for i in range(1, 2 * max_ops + 1)]))
        for n in range(1, 2 * max_ops + 1):  # number of operators
            s = [0]
            for e in range(1, 2 * max_ops - n + 1):  # number of empty nodes
                s.append(s[e - 1] + p1 * D[n - 1][e] + D[n - 1][e + 1])
            D.append(s)
        assert all(len(D[i]) >= len(D[i + 1]) for i in range(len(D) - 1))
        return D

    def generate_leaf(self, rng, degree):
        if rng.rand() < self.prob_rand:
            return 'rand'
        else:
            draw = rng.rand()
            if draw < self.prob_const:
	            return rng.choice(self.constants)
            elif draw > self.prob_const and draw < self.prob_const + self.prob_n:
                return 'n'
            else:
                return f'x_{rng.randint(self.dimension)}_{rng.randint(degree)+1}'

    def generate_ops(self, rng, arity):
        if arity==1:
            ops = [unary for unary in self.unaries]
        else:
            ops = [binary for binary in self.binaries]
        return rng.choice(ops)

    def sample_next_pos(self, rng, nb_empty, nb_ops):
        """
        Sample the position of the next node (binary case).
        Sample a position in {0, ..., `nb_empty` - 1}.
        """
        assert nb_empty > 0
        assert nb_ops > 0
        probs = []
        if self.unary:
            for i in range(nb_empty):
                probs.append(self.distrib[nb_ops - 1][nb_empty - i])
        for i in range(nb_empty):
            probs.append(self.distrib[nb_ops - 1][nb_empty - i + 1])
        probs = [p / self.distrib[nb_ops][nb_empty] for p in probs]
        probs = np.array(probs, dtype=np.float64)
        e = rng.choice(len(probs), p=probs)
        arity = 1 if self.unary and e < nb_empty else 2
        e %= nb_empty
        return e, arity

    def generate_tree(self, rng, nb_ops, degree):
        tree = Node(0, self.params)
        empty_nodes = [tree]
        next_en = 0
        nb_empty = 1
        while nb_ops > 0:
            next_pos, arity = self.sample_next_pos(rng, nb_empty, nb_ops)
            for n in empty_nodes[next_en:next_en + next_pos]:
                n.value = self.generate_leaf(rng, degree)
            next_en += next_pos
            op = self.generate_ops(rng, arity)
            empty_nodes[next_en].value = op
            for _ in range(arity):
                e = Node(0, self.params)
                empty_nodes[next_en].push_child(e)
                empty_nodes.append(e)
            nb_empty += arity - 1 - next_pos
            nb_ops -= 1
            next_en += 1
        for n in empty_nodes[next_en:]:
            n.value = self.generate_leaf(rng, degree)
        
        #tree = self.check_tree(tree, degree)
        
        return tree
    
    #def check_tree(self, node, degree):
    #    '''
    #    Remove identical leafs
    #    '''
    #    if len(node.children)==0: return node
    #    elif len(node.children)==1: 
    #        if node.children[0].children:
    #            return self.check_tree(node.children[0], degree)
    #        else: 
    #            while isinstance(node.children[0].value,int):
    #                node.children[0].value = self.generate_leaf(degree)
    #    else:
    #        node.children[0] = self.check_tree(node.children[0], degree)
    #        node.children[1] = self.check_tree(node.children[1], degree)
    #        if bool(node.children[0].children or node.children[1].children): return node
    #        while (node.children[0].value == node.children[1].value) or (isinstance(node.children[0].value,int) and isinstance(node.children[1].value,int)):
    #            node.children[1].value = self.generate_leaf(degree)
    #    return node
        
    def generate(self, rng, nb_ops=None, deg=None, length=None, prediction_points=False):
        rng = rng
        rng.seed() 

        """prediction_points is a boolean which indicates whether we compute prediction points. By default we do not to save time. """
        if deg is None:    deg    = rng.randint(1, self.max_degree + 1)
        if length is None: length = rng.randint(3*deg, self.max_len+1)

        if prediction_points:
            length +=  self.params.n_predictions
        
        trees = []
        if nb_ops is None: nb_ops = rng.randint(1, self.max_ops + 1, size=(self.dimension,))
        elif type(nb_ops)==int: nb_ops = [nb_ops]*self.dimension
            
        for i in range(self.dimension):
            trees.append(self.generate_tree(rng, nb_ops[i],deg))
        tree = NodeList(trees)
        
        recurrence_degrees = tree.get_recurrence_degrees()
        min_recurrence_degree, max_recurrence_degree = min(recurrence_degrees), max(recurrence_degrees)

        initial_conditions = [[rng.uniform(-self.init_scale, self.init_scale) if self.real_series else rng.randint(-self.init_scale, self.init_scale+1) \
                               for _ in range(recurrence_degrees[dim])] for dim in range(self.dimension)]

        series = [initial_conditions[dim][deg] for dim in range(self.dimension) for deg in range(min_recurrence_degree)]

        ##complete initial conditions by computing the real sequence
        for degree in range(min_recurrence_degree, max_recurrence_degree):
            dim_to_compute = [dim for dim in range(self.dimension)  if degree>=recurrence_degrees[dim]]
            try:
                next_values = tree.val(series,dim_to_compute=dim_to_compute)
            except Exception as e:
                #print(e, "degree: {}".format(degree), series, tree.infix())
                return None, None, None
            for dim in range(self.dimension):
                if next_values[dim] is None:
                    next_values[dim]=initial_conditions[dim][degree]
                    
            if any([abs(x)>self.max_number for x in next_values]): 
                return None, None, None
            try:
                next_values_array = np.array(next_values, dtype=np.float)
            except OverflowError as e:
                print(tree, next_values)
                return None, None, None
            
            if np.any(np.isnan(next_values_array)): 
                return None, None, None
            series.extend(next_values)

        assert len(series)==max_recurrence_degree*self.dimension, "Problem with initial conditions"

        ##compute remaining points with given initial conditions
        for i in range(max_recurrence_degree, length):
            try:
                vals = tree.val(series)
            except Exception as e:
                #print(e, series, tree.infix())
                return None, None, None
            
            if any([abs(x)>self.max_number for x in vals]): 
                return None, None, None
            try:
                vals_array = np.array(vals, dtype=np.float)
            except OverflowError as e:
                print(tree, vals)
                return None, None, None
            if np.any(np.isnan(vals_array)): 
                return None, None, None
            
            series.extend(vals)
            
        if prediction_points:
            series_input = series[:-self.params.n_predictions*self.dimension+1]
            series_to_predict = series[-self.params.n_predictions*self.dimension:]
        else:
            series_input = series
            series_to_predict = None
            
        return tree, series_input, series_to_predict

    def evaluate(self, src, tgt, hyp, n_predictions=3):
        src_hyp = copy.deepcopy(src)
        src_tgt = copy.deepcopy(src)
        errors = []
        for i in range(n_predictions):
            try:
                pred = hyp.val(src_hyp, deterministic=True)
                src_hyp.extend(pred)
                true = tgt.val(src_tgt, deterministic=True)
                src_tgt.extend(true)
                errors.append(max([abs(float(p-t)/float(t+1e-100)) for p,t in zip(pred, true)]))
            except Exception as e:
                print(e)
                return -1
        return max(errors)        

    def chunks_idx(self, step, min, max):
        curr=min
        while curr<max:
            yield [i for i in range(curr, curr+step)]
            curr+=step

    def evaluate_numerical(self, tgt, hyp):
        errors = []
        
        for idx in self.chunks_idx(self.dimension, min=0, max=len(tgt)):
            try:
                pred=[hyp[i] for i in idx]
                true=[tgt[i] for i in idx]
                errors.append(max([abs(float(p-t)/float(t+1e-100)) for p,t in zip(pred, true)]))
            except IndexError or TypeError:
                return -1
        return max(errors)  

    def evaluate_without_target(self, src, hyp, n_predictions=3):
        errors = []
        targets = src[-n:]
        src = src[:-n]
        for i in range(n_predictions):
            pred = hyp.val(src)
            true = targets[i]
            src.extend(pred)
            errors.append(max([abs(float(p-t)/float(t+1e-100)) for p,t in zip(pred, true)]))
            # except:
            #     return -1
        return max(errors)
        

In [17]:
class params:
    float_precision = 3
    max_len = 30
    max_degree = 6
    max_number = 1e100
    real_series = False
    output_numeric=True
    max_exponent = 100
    init_scale = 10
    prob_const=1/4
    prob_n = 1/4
    max_int = 10
    prob_rand = 0.1
    dimension=2
    int_base=10
    n_predictions=5
    float_tolerance=0.1
    more_tolerance="0.1"
    max_ops=4
    
generator = RandomRecurrence(params)
env = RecurrenceEnvironment(params)
rng = np.random.RandomState(0)
import time 

SPECIAL_WORDS = ["EOS", "PAD", "(", ")", "SPECIAL"]

if params.real_series:
    input_encoder = RealSeries(params)
else:
    input_encoder = IntegerSeries(params)
input_words = SPECIAL_WORDS+sorted(list(set(input_encoder.symbols)))

if params.output_numeric:
    if params.real_series:
        output_encoder = RealSeries(params)
    else:
        output_encoder = IntegerSeries(params)
else:
    output_encoder = Equation(params)

deb = time.time()
k_trials=10000
successful_trials=0
size_input=[]
size_output=[]
n_ops_stats = {i:0 for i in range((params.max_ops+1)*params.dimension)}
for k in range(k_trials):
    tree, series, series_to_predict = generator.generate(rng, length=15, prediction_points=True)
    if tree is not None:
        successful_trials+=1
        size_input.append(len(input_encoder.encode(series)))
        if not params.output_numeric:
            size_output.append(len(output_encoder.encode(tree)))
        else:
            size_output.append(len(output_encoder.encode(series_to_predict)))
        n_ops = env.code_class(None, tree)
        n_ops_stats[n_ops]+=1
        #print(tree)
        #print(series)
        #for i in range(params.dimension):
        #    plt.title(tree.infix())
        #    plt.plot(series[i::params.dimension])
        #plt.yscale('symlog')
        #plt.show()
size_input=np.mean(np.array(size_input))
size_output=np.mean(np.array(size_output))

print("Percentage of successful generation: {}".format(successful_trials/k_trials*100))
print("Time: {}".format(time.time()-deb))
print("Average sizes: input {}, output {}".format(size_input.max(), size_output.max()))
print(n_ops_stats)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


Percentage of successful generation: 54.2
Time: 12.216681003570557
Average sizes: input 96.49760147601476, output 48.80590405904059
{0: 0, 1: 0, 2: 511, 3: 886, 4: 1141, 5: 1332, 6: 862, 7: 463, 8: 225, 9: 0}


# Checking encoder

In [4]:
class params:
    int_base = 10
    has_separator = False
    
encoder = Equation(params)
print(tree.infix())
encoded = encoder.encode(tree)
print(encoded)
decoded = encoder.decode(encoded)
print(decoded.infix())

AttributeError: 'NoneType' object has no attribute 'infix'

In [None]:
class params:
    a=1
    
from src.envs.encoders import RealSeries

encoder = RealSeries(params)
x = series
print(x)
encoded = encoder.encode(x)
print(encoded)
decoded = encoder.decode(encoded)
print(decoded)