In [1]:
import sys
import re
from tqdm import tqdm

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from collections import Counter

sys.path.append("../")
from calculus_path_mod.term_engine import *
from calculus_path_mod.reduction_strategy import *
from calculus_path_mod.terms import num_comparison, nat_numbers, arithm_ops, combinators, pairs, logic
from calculus_path_mod.terms.pseudonym import *

from calculus_path_mod.json_serialization import load_terms

# Load Terms filtered by LO & RI

In [2]:
terms_LO = load_terms("../../tests_11_retests/terms_210_filtered_LO.dat")
terms_RI = load_terms("../../tests_11_retests/terms_210_filtered_RI.dat")

# Collect more terms with normalization process data for LO & LI strategies with terms_LO & terms_RI

In [3]:
def gen_norm_data(terms_list, strategy):
    normalized_terms_dict = dict()
    for term in tqdm(terms_list):
        term_name = term.simple_str()
        normalized_terms_dict[term_name] = []
        term_red_steps = 0
        (step_term, _, _), norm_term = term.one_step_normalize_visual(strategy)
        normalized_terms_dict[term_name].append(step_term.simple_str())

        while norm_term:
            normalized_terms_dict[term_name].append(norm_term.simple_str())
            (step_term, _, _), norm_term = norm_term.one_step_normalize_visual(strategy)

            # computation limitation
            if (step_term.vertices_number > 3_000) or (term_red_steps > 400):
                norm_term = None
    return normalized_terms_dict

In [4]:
res_OO = gen_norm_data(terms_LO, RIStrategy())

100%|██████████| 226/226 [00:37<00:00,  6.05it/s] 


In [5]:
res_IO = gen_norm_data(terms_RI, RIStrategy())

100%|██████████| 223/223 [00:00<00:00, 266.93it/s]


# Prepare the dataset

In [6]:
steps_lo = []
simple_terms = []

for res_ in (res_OO, res_IO):
    for key_ in res_.keys():
        list_red_steps = res_[key_]
        total_steps = len(list_red_steps) - 1
        for inx_ in range(total_steps + 1):
            if list_red_steps[inx_] not in simple_terms:
                simple_terms.append(list_red_steps[inx_])
                steps_lo.append(total_steps - inx_)

In [7]:
df = pd.DataFrame({"steps_num": steps_lo, "simple_terms": simple_terms})
df.to_csv("./data_RI/steps_simple_term_str.csv", index=False)