In [33]:
import sys
import re
import random

from tqdm import tqdm

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from collections import Counter
from joypy import joyplot

sys.path.append("../")
from calculus_path_mod.term_engine import *
from calculus_path_mod.reduction_strategy import *
from calculus_path_mod.terms import num_comparison, nat_numbers, arithm_ops, combinators, pairs, logic
from calculus_path_mod.terms.pseudonym import *

from calculus_path_mod.json_serialization import load_terms
from fitter import Fitter, get_common_distributions
from calculus_utils.drawing import draw_steps_displot

# Load Terms filtered LO

In [34]:
terms_LO = load_terms("../tests_11_retests/terms_210_filtered_LO.dat")
# terms_RI = load_terms("../tests_11_retests/terms_210_filtered_RI.dat")

In [35]:
from calculus_path_mod.reduction_strategy import RandomOuterStrategy

ro_strategy = RandomOuterStrategy()


def tree_normalize_with_params(term, is_limited=True, max_count_step_term=400, steps=400, vertices_lim=7_000):
    norm_steps_list = []

    try:
        dict_redexes_indexes = ro_strategy._get_redexes_indexes(term)
    except ValueError:
        dict_redexes_indexes = dict()
    norm_params = {
        "term": term,
        "prev_term_redex": (0, -1),
        "redex_depths": list(dict_redexes_indexes.values()),
        "redex_indexes": list(dict_redexes_indexes.keys()),
        "step_time": 0.0,
    }

    norm_steps_list.append([norm_params, ])
    step_inx = 0
    is_normalizable = len(norm_params["redex_indexes"]) > 0

    while is_normalizable:
        norm_steps_list.append(list())
        iter_indexes = range(len(norm_steps_list[step_inx]))
        iter_indexes = iter_indexes if len(norm_steps_list[step_inx]) < max_count_step_term else random.sample(
            iter_indexes, max_count_step_term)
        for inx in iter_indexes:
            for redex_index in norm_steps_list[step_inx][inx]["redex_indexes"]:
                print(f"len_nsl={len(norm_steps_list)}, {inx}, {redex_index}")
                if is_limited and norm_steps_list[step_inx][inx]["term"].vertices_number > vertices_lim:
                    continue
                start_time = time.process_time()
                reduced_term = norm_steps_list[step_inx][inx]["term"]._update_bound_vars()
                reduced_term = reduced_term._beta_conversion_visual(redex_index)
                end_time = time.process_time()

                try:
                    dict_redexes_indexes = ro_strategy._get_redexes_indexes(reduced_term)
                except ValueError:
                    dict_redexes_indexes = dict()
                norm_params = {
                    "term": reduced_term,
                    "prev_term_redex": (inx, redex_index),
                    "redex_depths": list(dict_redexes_indexes.values()),
                    "redex_indexes": list(dict_redexes_indexes.keys()),
                    "step_time": start_time - end_time,
                }

                norm_steps_list[step_inx + 1].append(norm_params)

        if is_limited and step_inx == steps:
            break

        step_inx += 1
        is_normalizable = False
        for norm_params in norm_steps_list[step_inx]:
            if len(norm_params["redex_indexes"]) > 0:
                is_normalizable = True
                break
    return norm_steps_list

In [36]:
def gen_norm_tree_data(terms_list):
    steps = []
    for term in tqdm(terms_list):
        try:
            norm_data = tree_normalize_with_params(term, max_count_step_term=200, vertices_lim=3_000, steps=40)
            steps.append(norm_data)
        except Exception:
            steps.append(None)
    return steps

In [37]:
data_norm_tree = gen_norm_tree_data(terms_LO[:3])

  0%|          | 0/3 [00:00<?, ?it/s]

len_nsl=2, 0, 1
len_nsl=2, 0, 12
len_nsl=2, 0, 30
len_nsl=3, 1, 1
len_nsl=3, 1, 27
len_nsl=3, 2, 1
len_nsl=3, 2, 12
len_nsl=4, 1, 1
len_nsl=4, 3, 1
len_nsl=2, 0, 2
len_nsl=2, 0, 10
len_nsl=2, 0, 27
len_nsl=2, 0, 32
len_nsl=2, 0, 44
len_nsl=2, 0, 47
len_nsl=3, 0, 1
len_nsl=3, 0, 8
len_nsl=3, 0, 12
len_nsl=3, 0, 15
len_nsl=3, 0, 20
len_nsl=3, 0, 41
len_nsl=3, 0, 44
len_nsl=3, 1, 2
len_nsl=3, 1, 16
len_nsl=3, 1, 24
len_nsl=3, 1, 29
len_nsl=3, 1, 41
len_nsl=3, 1, 44
len_nsl=3, 2, 2
len_nsl=3, 2, 10
len_nsl=3, 2, 29
len_nsl=3, 2, 41
len_nsl=3, 2, 44
len_nsl=3, 3, 2
len_nsl=3, 3, 10
len_nsl=3, 3, 27
len_nsl=3, 3, 41
len_nsl=3, 3, 44
len_nsl=3, 4, 2
len_nsl=3, 4, 10
len_nsl=3, 4, 27
len_nsl=3, 4, 32
len_nsl=3, 4, 44
len_nsl=3, 5, 2
len_nsl=3, 5, 10
len_nsl=3, 5, 27
len_nsl=3, 5, 32
len_nsl=3, 5, 44
len_nsl=4, 0, 3
len_nsl=4, 0, 6
len_nsl=4, 0, 14
len_nsl=4, 0, 17
len_nsl=4, 0, 26
len_nsl=4, 0, 30
len_nsl=4, 0, 33
len_nsl=4, 0, 38
len_nsl=4, 1, 1
len_nsl=4, 1, 10
len_nsl=4, 1, 13
len_nsl=4, 1,

 67%|██████▋   | 2/3 [00:05<00:02,  2.90s/it]

len_nsl=22, 155, 37
len_nsl=22, 64, 1
len_nsl=22, 64, 17
len_nsl=22, 64, 29
len_nsl=22, 244, 1
len_nsl=22, 244, 17
len_nsl=22, 244, 48
len_nsl=22, 362, 7
len_nsl=22, 362, 27
len_nsl=22, 346, 1
len_nsl=22, 346, 17
len_nsl=22, 346, 40
len_nsl=22, 119, 1
len_nsl=22, 119, 9
len_nsl=22, 119, 20
len_nsl=22, 13, 7
len_nsl=22, 87, 1
len_nsl=22, 87, 6
len_nsl=22, 87, 57
len_nsl=22, 308, 15
len_nsl=22, 248, 7
len_nsl=22, 241, 1
len_nsl=22, 387, 15
len_nsl=22, 387, 27
len_nsl=22, 377, 1
len_nsl=22, 312, 24
len_nsl=22, 386, 1
len_nsl=22, 386, 17
len_nsl=22, 386, 48
len_nsl=22, 274, 1
len_nsl=22, 274, 17
len_nsl=22, 274, 57
len_nsl=22, 351, 1
len_nsl=22, 351, 26
len_nsl=22, 351, 48
len_nsl=22, 198, 1
len_nsl=22, 198, 45
len_nsl=22, 198, 57
len_nsl=22, 40, 24
len_nsl=22, 180, 1
len_nsl=22, 180, 26
len_nsl=22, 180, 57
len_nsl=22, 349, 1
len_nsl=22, 349, 45
len_nsl=22, 349, 57
len_nsl=22, 313, 1
len_nsl=22, 63, 7
len_nsl=22, 63, 18
len_nsl=22, 63, 30
len_nsl=22, 20, 1
len_nsl=22, 284, 1
len_nsl=22, 18

100%|██████████| 3/3 [00:13<00:00,  4.35s/it]

len_nsl=13, 538, 31
len_nsl=13, 538, 33
len_nsl=13, 306, 2
len_nsl=13, 306, 4
len_nsl=13, 306, 7
len_nsl=13, 306, 20
len_nsl=13, 306, 35
len_nsl=13, 296, 2
len_nsl=13, 296, 4
len_nsl=13, 296, 8
len_nsl=13, 144, 2
len_nsl=14, 309, 2
len_nsl=14, 275, 2
len_nsl=14, 275, 5
len_nsl=14, 275, 7
len_nsl=14, 549, 2
len_nsl=14, 549, 4
len_nsl=14, 549, 7
len_nsl=14, 549, 20
len_nsl=14, 303, 2
len_nsl=14, 303, 5
len_nsl=14, 303, 15
len_nsl=14, 303, 32
len_nsl=14, 400, 2
len_nsl=14, 400, 5
len_nsl=14, 223, 2
len_nsl=14, 223, 4
len_nsl=14, 223, 8
len_nsl=14, 223, 32
len_nsl=14, 360, 2
len_nsl=14, 360, 5
len_nsl=14, 360, 7
len_nsl=14, 360, 31
len_nsl=14, 436, 2
len_nsl=14, 575, 2
len_nsl=14, 575, 4
len_nsl=14, 579, 3
len_nsl=14, 373, 2
len_nsl=14, 373, 5
len_nsl=14, 373, 15
len_nsl=14, 247, 2
len_nsl=14, 247, 4
len_nsl=14, 247, 8
len_nsl=14, 531, 2
len_nsl=14, 467, 2
len_nsl=14, 467, 5
len_nsl=14, 467, 29
len_nsl=14, 231, 2
len_nsl=14, 529, 2
len_nsl=14, 529, 5
len_nsl=14, 600, 2
len_nsl=14, 600, 5
l




In [39]:
tsd_df = pd.DataFrame(columns=[
    "vertices_pre", "redexes_pre", "height_pre", "width_pre",
    "vertices_post", "redexes_post", "height_post", "width_post",
    "redex_depth", "redex_index", "step_time"
])

for term_steps_data in tqdm(data_norm_tree):
    for inx in range(1, len(term_steps_data)):
        for post_term_data in term_steps_data[inx]:
            prev_term_redex_info = post_term_data["prev_term_redex"]
            pre_term_data = term_steps_data[inx - 1][prev_term_redex_info[0]]

            term_data_dict = {
                "vertices_pre": [pre_term_data["term"].vertices_number],
                "redexes_pre": [len(pre_term_data["term"].redexes)],
                "height_pre": [pre_term_data["term"].term_height],
                "width_pre": [pre_term_data["term"].term_width],

                "vertices_post": [post_term_data["term"].vertices_number],
                "redexes_post": [len(post_term_data["term"].redexes)],
                "height_post": [post_term_data["term"].term_height],
                "width_post": [post_term_data["term"].term_width],

                "redex_depth": [pre_term_data["term"].redex_depth(prev_term_redex_info[1])],
                "redex_index": [prev_term_redex_info[1]],
                "step_time": [post_term_data["step_time"]],
            }

            term_data_dict["vertices_diff"] = [term_data_dict["vertices_pre"][0] - term_data_dict["vertices_post"][0]]
            term_data_dict["redexes_diff"] = [term_data_dict["redexes_pre"][0] - term_data_dict["redexes_post"][0]]
            term_data_dict["height_diff"] = [term_data_dict["height_pre"][0] - term_data_dict["height_post"][0]]
            term_data_dict["width_diff"] = [term_data_dict["width_pre"][0] - term_data_dict["width_post"][0]]

            tsd_df = pd.concat([tsd_df, pd.DataFrame(term_data_dict)], ignore_index=True).reset_index(drop=True)

100%|██████████| 3/3 [01:03<00:00, 21.10s/it]


In [40]:
len(tsd_df)

26396

In [41]:
tsd_df.head()

Unnamed: 0,vertices_pre,redexes_pre,height_pre,width_pre,vertices_post,redexes_post,height_post,width_post,redex_depth,redex_index,step_time,vertices_diff,redexes_diff,height_diff,width_diff
0,53,3,15,21,7,0,3,2,1,1,0.0,46.0,3.0,12.0,19.0
1,53,3,15,21,50,2,15,20,4,12,0.0,3.0,1.0,0.0,1.0
2,53,3,15,21,30,2,9,11,5,30,0.0,23.0,1.0,6.0,10.0
3,50,2,15,20,7,0,3,2,1,1,0.0,43.0,2.0,12.0,18.0
4,50,2,15,20,27,1,8,10,5,27,0.0,23.0,1.0,7.0,10.0


In [15]:
terms_LO[0]

<calculus_path_mod.term_engine.Application at 0x1186bbb6690>

In [16]:
term_0_norm_tree = tree_normalize_with_params(terms_LO[0], vertices_lim=3_000, steps=30)

len_nsl=2, 0, 1
len_nsl=2, 0, 12
len_nsl=2, 0, 30
len_nsl=3, 1, 1
len_nsl=3, 1, 27
len_nsl=3, 2, 1
len_nsl=3, 2, 12
len_nsl=4, 1, 1
len_nsl=4, 3, 1


In [17]:
len(term_0_norm_tree)

4

In [18]:
for inx, rec in enumerate(term_0_norm_tree):
    print(f"{inx}")
    for iinx, irec in enumerate(rec):
        print(iinx, irec)
    print("\n\n\n")

0
0 {'term': <calculus_path_mod.term_engine.Application object at 0x000001186BBB6690>, 'prev_term_redex': (0, -1), 'redex_depths': [1, 4, 5], 'redex_indexes': [1, 12, 30], 'step_time': 0.0}




1
0 {'term': <calculus_path_mod.term_engine.Abstraction object at 0x0000011878518890>, 'prev_term_redex': (0, 1), 'redex_depths': [], 'redex_indexes': [], 'step_time': 0.0}
1 {'term': <calculus_path_mod.term_engine.Application object at 0x0000011878645610>, 'prev_term_redex': (0, 12), 'redex_depths': [1, 5], 'redex_indexes': [1, 27], 'step_time': 0.0}
2 {'term': <calculus_path_mod.term_engine.Application object at 0x0000011878644650>, 'prev_term_redex': (0, 30), 'redex_depths': [1, 4], 'redex_indexes': [1, 12], 'step_time': 0.0}




2
0 {'term': <calculus_path_mod.term_engine.Abstraction object at 0x0000011878646310>, 'prev_term_redex': (1, 1), 'redex_depths': [], 'redex_indexes': [], 'step_time': 0.0}
1 {'term': <calculus_path_mod.term_engine.Application object at 0x0000011878646190>, 'prev_ter