In [2]:
import pandas as pd
from lark import Lark, Transformer, v_args
import dhlab as dh
import tools_imag as ti

In [None]:
# Define the grammar for expressions
grammar = """
    ?start: expr

    ?expr: expr "+" term   -> add
         | expr "-" term   -> sub
         | term

    ?term: term "*" factor -> mul
         | term "/" factor -> div
         | factor

    ?factor: WORD           -> lookup
           | "(" expr ")"

    %import common.WORD
    %import common.WS_INLINE
    %ignore WS_INLINE
"""

# Parser using the defined grammar
parser = Lark(grammar, start='start', parser='lalr')

In [50]:
@v_args(inline=True)
class Calculate(Transformer):
    def __init__(self, current_corpus):
        self.current_corpus = current_corpus  # Store current_corpus in the instance

    def add(self, a, b):
        #print("Adding:", a, "and", b)  # Debug: print the two components being added
        if isinstance(a, pd.DataFrame) and isinstance(b, pd.DataFrame):
            # Add the two DataFrames column-wise, summing the aligned indices
            result = a.add(b, fill_value=0).sum(axis=1).to_frame(name='sum')
            #print("Result of add:", result)  # Debug: print the result of the addition
            return result
        return a + b

    def sub(self, a, b):
        if isinstance(a, pd.DataFrame) and isinstance(b, pd.DataFrame):
            return a.subtract(b, fill_value=0).sum(axis=1).to_frame(name='sum')
        return a - b

    def mul(self, a, b):
        if isinstance(a, pd.DataFrame) and isinstance(b, pd.DataFrame):
            return a.multiply(b, fill_value=1).sum(axis=1).to_frame(name='sum')
        return a * b

    def div(self, a, b):
        if isinstance(a, pd.DataFrame) and isinstance(b, pd.DataFrame):
            return a.divide(b, fill_value=1).sum(axis=1).to_frame(name='sum')
        return a / b

    def lookup(self, word):
        #print(f"Looking up word: {word}")  # Debug: print the word being looked up
        # Return the trendline (DataFrame) for the word or a default DataFrame if not found
        result = ti.corpus_ngram(self.current_corpus, word)
        return result
        # if isinstance(result, pd.DataFrame):
        #     print(f"Found DataFrame for '{word}':\n{result}")  # Debug: print the result of lookup
        #     return result
        # else:
        #     raise ValueError(f"Expected DataFrame for '{word}', got {type(result)}")

In [48]:
def evaluate_expression(expression, current_corpus):
    tree = parser.parse(expression)
    calc = Calculate(current_corpus)
    final_result = calc.transform(tree)  
    return final_result  # Just return the DataFrame


In [57]:
# Example usage
expression = "(hun/han)+oss"
result = evaluate_expression(expression, current_corpus)
result

Unnamed: 0_level_0,sum
year,Unnamed: 1_level_1
1877,1.454519
1878,0.891134
1889,0.956065
1890,2.503494
1892,1.307111
1893,1.347585
1894,1.601305
1895,1.831425
1896,3.736494
1897,1.441174


In [33]:
# Example usage
expression = "(og + i)"
result = evaluate_expression(expression, current_corpus)
#print(result)

In [34]:
result

Unnamed: 0_level_0,i,og
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1877,1.082178,2.806899
1878,1.457768,2.702116
1889,2.381237,2.418351
1890,1.635527,3.071263
1892,1.321326,2.700229
1893,1.492444,2.36587
1894,1.753502,2.199752
1895,0.989405,1.301028
1896,1.037639,1.556458
1897,1.63358,3.104632


In [5]:
corpus = ti.get_imag_corpus()

In [25]:
c = corpus[corpus.authors.str.contains("Hamsun")]
current_corpus = c

In [18]:
ti.corpus_ngram_old(corpus[(corpus.authors.str.contains("Skram"))],words=["og","i"])

Unnamed: 0_level_0,i,og
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1827,,
1830,0.017094,0.0
1882,0.021923,0.025827
1885,0.015685,0.029035
1887,0.022065,0.03514
1888,0.01441,0.031698
1889,0.009882,0.015852
1890,0.017305,0.029724
1891,0.015094,0.03155
1892,0.015053,0.034051


In [19]:
ti.corpus_ngram(corpus[(corpus.authors.str.contains("Skram"))],words="og i")

Unnamed: 0_level_0,i,og
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1827,0.0,0.0
1830,1.709402,0.0
1882,2.1923,2.582671
1885,1.568469,2.903516
1887,2.206483,3.514029
1888,1.440982,3.169814
1889,0.988234,1.585232
1890,1.682118,2.986499
1891,1.509409,3.155039
1892,1.5053,3.405127
