In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import math 

In [38]:
from tqdm import tqdm


In [39]:
pd.set_option('display.max_columns', None)

In [40]:
df = pd.read_csv("../out.csv")

In [41]:
def str2function(function, equation_type):
    """
    Converts a string representation of an equation into a lambda function with the correct variables and mathematical expressions.

    ### Parameters:
    - **function** (`str`): The string representation of the equation.
    - **equation_type** (`str`): The type of equation, which can be "vol", "bio", or "car". This determines the specific variable replacements and list of variables used.

    ### Returns:
    - **function** (`lambda`): The generated lambda function based on the input string.
    - **filtered_list** (`list`): The list of variables used in the lambda function.

    ### Example Usage:
    ```python
    equation_str = "[d130] * [ht] + [p]"
    lambda_func, variables = str2function(equation_str, "bio")
    result = lambda_func(10, 5, 0.9)  # Example usage of the lambda function
    ```

    ### Notes:
    - The function performs necessary replacements (like `[d130]` to `d130`) and handles specific mathematical expressions like `Exp` and `LOG`.
    """

    # Replacements and variables based on the equation type
    replacements = {
        "vol": {
            "[d130]": "d130",
            "[ht]": "ht"
        },
        "bio": {
            "Exp": "math.exp",
            "[p]": "densi",
            "[v]": "v",
            "[d130]": "d130",
            "[ht]": "ht",
            "LOG": "math.log"
        },
        "car": {
            "[b]": "b",
            "[d130]": "d130",
            "[ht]": "ht"
        }
    }

    variable_sets = {
        "vol": ["d130", "ht"],
        "bio": ["densi", "v", "d130", "ht"],
        "car": ["b", "d130", "ht"]
    }

    # Validate equation_type
    if equation_type not in replacements or equation_type not in variable_sets:
        raise ValueError(f'{equation_type} is not known. The only available options are "vol", "bio", "car".')

    # Apply replacements based on the equation type
    for key, value in replacements[equation_type].items():
        function = function.replace(key, value)

    # Filter the variables based on which are used in the function
    variables = variable_sets[equation_type]
    filtered_list = [var for var in variables if var in function]

    # Generate the lambda function
    lambda_function = eval(f'lambda {", ".join(filtered_list)}: {function}')

    return lambda_function, filtered_list

In [6]:
df.loc[15]

id                                                              16
anio_levantamiento                                            2005
conglomerado                                                   153
sitio                                                            1
condicion                                                    Tocón
especie_id                                                  2270.0
familia                                               Cupressaceae
genero                                                   Juniperus
epiteto                                                        NaN
categoria_infra                                                NaN
infraespecie                                                   NaN
numero_arbol                                                    14
numero_tallo                                                   NaN
tallos                                                         NaN
diametro                                                      

In [7]:
df.biomasa_eq.value_counts()

biomasa_eq
(Exp(-2.754)*[d130]**2.574)                                30661
Exp(-2.27)*([d130]**2.39)                                  11213
(3.14159 * (([d130] / 2 / 100) ** 2) * [ht]) / 3            9328
Exp(0.9173)*([d130]**1.0730)                                6452
Exp(-1.6469)*([d130]**2.1255)                               5974
(Exp(-2.084)*[d130]**2.323)                                 5029
(1.7737)*([d130]**1.2282)                                   4199
(0.283)*((([d130]**2)*[ht])**0.807)                         3464
(Exp(-0.877)*[d130]**1.98)                                  2805
(0.0416)*([d130]**2.7154)                                   2055
(Exp(-2.187+(0.916*(LOG(([p])*(([d130])**2)*([ht]))))))     1954
(0.0841)*([d130]**2.41)                                     1803
(Exp(-2.592)*[d130]**2.585)                                 1718
(0.1354)*([d130]**2.3033)                                   1659
(0.0342)*([d130]**2.7590)                                   1356
(Exp(-3.066)*[

In [45]:
for row in tqdm(df.itertuples(), total=len(df)):
            index = row.Index
            try:
                f, var = str2function(row.biomasa_eq, "bio")
                if var == ['densi', 'd130', 'ht']:
                    df.at[index, 'biomasa'] = f(float(row.densidad_eq), row.diametro, row.altura)
                elif var == ['d130', 'ht']:
                    df.at[index, 'biomasa'] = f(row.diametro, row.altura)
                elif var == ['densi', 'v']:
                    df.at[index, 'biomasa'] = f(float(row.densidad_eq), row.volumen)
                else:
                    df.at[index, 'biomasa'] = f(row.diametro)
            except Exception as e:
                print(f"Error calculating biomass at index {index}: {e}")
                break

100%|████████████████████████████████| 100001/100001 [00:02<00:00, 46059.49it/s]


In [42]:
row = df.iloc[937]

In [43]:
row.biomasa_eq

'Exp(0.9173)*([d130]**1.0730)'

In [44]:
row

id                                                938
anio_levantamiento                               2005
conglomerado                                     1049
sitio                                               2
condicion                                        Vivo
especie_id                                    13840.0
familia                                     Cactaceae
genero                                      Carnegiea
epiteto                                      gigantea
categoria_infra                                   NaN
infraespecie                                      NaN
numero_arbol                                        1
numero_tallo                                      NaN
tallos                                            0.0
diametro                                          7.5
altura                                           5.35
is_predicted                                      0.0
latitud                                       31.9438
longitud                    

In [16]:
f,var,st = str2function(row.biomasa_eq, "bio")

In [18]:
float(row.densidad_eq), row.diametro, row.altura

(0.55779, 0.0, 5.35)

In [17]:
f(float(row.densidad_eq), row.diametro, row.altura)

ValueError: math domain error

In [15]:
(math.exp(-2.187+(0.916*(math.log((0.4612)*((16.5)**2)*(0.3))))))

3.117412128603401

In [19]:
st

'(math.exp(-2.187+(0.916*(math.log((densi)*((d130)**2)*(ht))))))'

In [None]:
for row in tqdm(df.itertuples(), total=len(df)):
    index = row.Index
    try:
        f, var = str2function(row.biomasa_eq, "bio")
        if var == ['densi', 'd130', 'ht']:
            df.at[index, 'biomasa'] = f(float(row.densidad_eq), row.diametro, row.altura)
        elif var == ['d130', 'ht']:
            df.at[index, 'biomasa'] = f(row.diametro, row.altura)
        elif var == ['densi', 'v']:
            df.at[index, 'biomasa'] = f(float(row.densidad_eq), row.volumen)
        else:
            df.at[index, 'biomasa'] = f(row.diametro)
    except Exception as e:
        print(f"Error calculating biomass at index {index}: {e}")
        print(row.biomasa_eq)

In [23]:
m = pd.read_csv("../csvs/modelo2.csv")

In [27]:
m.iloc[0,6]

'(0.1649)*([d130]**2.2755)'