In [1]:
import sys
import re
import random
import time

from tqdm import tqdm

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from collections import Counter
from joypy import joyplot

sys.path.append("../")
from calculus_path_mod.term_engine import *
from calculus_path_mod.reduction_strategy import *
from calculus_path_mod.terms import num_comparison, nat_numbers, arithm_ops, combinators, pairs, logic
from calculus_path_mod.terms.pseudonym import *

from calculus_path_mod.json_serialization import load_terms
from fitter import Fitter, get_common_distributions
from calculus_utils.drawing import draw_steps_displot

# Load Terms filtered LO

In [2]:
terms_LO = load_terms("../tests_11_retests/terms_210_filtered_LO.dat")
# terms_RI = load_terms("../tests_11_retests/terms_210_filtered_RI.dat")

In [3]:
from calculus_path_mod.reduction_strategy import RandomOuterStrategy

ro_strategy = RandomOuterStrategy()


def tree_normalize_with_params(term, is_limited=True, max_count_step_term=400, steps=400, vertices_lim=7_000):
    norm_steps_list = []

    try:
        dict_redexes_indexes = ro_strategy._get_redexes_indexes(term)
    except ValueError:
        dict_redexes_indexes = dict()
    norm_params = {
        "term": term,
        "prev_term_redex": (0, -1),
        "redex_depths": list(dict_redexes_indexes.values()),
        "redex_indexes": list(dict_redexes_indexes.keys()),
        "step_time": 0.0,
    }

    norm_steps_list.append([norm_params, ])
    step_inx = 0
    is_normalizable = len(norm_params["redex_indexes"]) > 0

    while is_normalizable:
        norm_steps_list.append(list())
        iter_indexes = range(len(norm_steps_list[step_inx]))
        iter_indexes = iter_indexes if len(norm_steps_list[step_inx]) < max_count_step_term else random.sample(
            iter_indexes, max_count_step_term)
        for inx in iter_indexes:
            for redex_index in norm_steps_list[step_inx][inx]["redex_indexes"]:
                if is_limited and norm_steps_list[step_inx][inx]["term"].vertices_number > vertices_lim:
                    continue
                start_time = time.process_time_ns()
                reduced_term = norm_steps_list[step_inx][inx]["term"]._update_bound_vars()
                reduced_term = reduced_term._beta_conversion_visual(redex_index)
                end_time = time.process_time_ns()

                try:
                    dict_redexes_indexes = ro_strategy._get_redexes_indexes(reduced_term)
                except ValueError:
                    dict_redexes_indexes = dict()
                norm_params = {
                    "term": reduced_term,
                    "prev_term_redex": (inx, redex_index),
                    "redex_depths": list(dict_redexes_indexes.values()),
                    "redex_indexes": list(dict_redexes_indexes.keys()),
                    "step_time": start_time - end_time,
                }

                norm_steps_list[step_inx + 1].append(norm_params)

        if is_limited and step_inx == steps:
            break

        step_inx += 1
        is_normalizable = False
        for norm_params in norm_steps_list[step_inx]:
            if len(norm_params["redex_indexes"]) > 0:
                is_normalizable = True
                break
    return norm_steps_list

In [4]:
len(terms_LO)

226

In [5]:
def gen_norm_tree_data(terms_list):
    for inx_term in tqdm(range(223, len(terms_list))):
        try:
            term_steps_data = tree_normalize_with_params(terms_list[inx_term], max_count_step_term=200, vertices_lim=3_000, steps=40)
        except:
            continue

        tsd_df = pd.DataFrame(columns=[
            "vertices_pre", "redexes_pre", "height_pre", "width_pre",
            "vertices_post", "redexes_post", "height_post", "width_post",
            "redex_depth", "redex_index", "step_time"
        ])

        for inx in range(1, len(term_steps_data)):
            for post_term_data in term_steps_data[inx]:
                prev_term_redex_info = post_term_data["prev_term_redex"]
                pre_term_data = term_steps_data[inx - 1][prev_term_redex_info[0]]

                term_data_dict = {
                    "vertices_pre": [pre_term_data["term"].vertices_number],
                    "redexes_pre": [len(pre_term_data["term"].redexes)],
                    "height_pre": [pre_term_data["term"].term_height],
                    "width_pre": [pre_term_data["term"].term_width],

                    "vertices_post": [post_term_data["term"].vertices_number],
                    "redexes_post": [len(post_term_data["term"].redexes)],
                    "height_post": [post_term_data["term"].term_height],
                    "width_post": [post_term_data["term"].term_width],

                    "redex_depth": [pre_term_data["term"].redex_depth(prev_term_redex_info[1])],
                    "redex_index": [prev_term_redex_info[1]],
                    "step_time": [post_term_data["step_time"]],
                }

                term_data_dict["vertices_diff"] = [
                    term_data_dict["vertices_pre"][0] - term_data_dict["vertices_post"][0]]
                term_data_dict["redexes_diff"] = [term_data_dict["redexes_pre"][0] - term_data_dict["redexes_post"][0]]
                term_data_dict["height_diff"] = [term_data_dict["height_pre"][0] - term_data_dict["height_post"][0]]
                term_data_dict["width_diff"] = [term_data_dict["width_pre"][0] - term_data_dict["width_post"][0]]

                tsd_df = pd.concat([tsd_df, pd.DataFrame(term_data_dict)], ignore_index=True).reset_index(drop=True)

        tsd_df.to_csv(f"./collected_data/tree_reduction_t_{inx_term}.csv")

In [6]:
gen_norm_tree_data(terms_LO)

100%|██████████| 3/3 [00:23<00:00,  7.74s/it]


In [None]:
terms_LO[0]

In [None]:
term_0_norm_tree = tree_normalize_with_params(terms_LO[0], vertices_lim=3_000, steps=30)

In [None]:
len(term_0_norm_tree)

In [None]:
for inx, rec in enumerate(term_0_norm_tree):
    print(f"{inx}")
    for iinx, irec in enumerate(rec):
        print(iinx, irec)
    print("\n\n\n")