In [3]:
from dash import Dash, dcc, html, Input, Output, State, callback
import base64
import os
from mistralai import Mistral

def encode_image(image_path):
    """Encode the image to base64."""
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except FileNotFoundError:
        print(f"Error: The file {image_path} was not found.")
        return None
    except Exception as e:  # Added general exception handling
        print(f"Error: {e}")
        return None
    
def format_to_triple_quotes(single_line_string):
    """
    将单行字符串转换为易读的三引号字符串。
    
    参数:
        single_line_string (str): 单行字符串。
        
    返回:
        str: 格式化后的三引号字符串。
    """
    # 使用文本拆分和缩进处理
    formatted_lines = []
    
    # 按换行符分割字符串
    lines = single_line_string.split('\\n')
    
    for line in lines:
        # 去除多余的转义符并添加缩进
        formatted_lines.append(line.replace('\\\\', '\\'))
    
    # 组合成三引号字符串
    triple_quoted_string = '\n'.join(formatted_lines) + '\n'
    return triple_quoted_string

app = Dash()

default_text = r"""This example uses the block delimiter:
$$
\frac{1}{(\sqrt{\phi \sqrt{5}}-\phi) e^{\frac25 \pi}} =
1+\frac{e^{-2\pi}} {1+\frac{e^{-4\pi}} {1+\frac{e^{-6\pi}}
{1+\frac{e^{-8\pi}} {1+\ldots} } } }
$$

This example uses the inline delimiter:
$E^2=m^2c^4+p^2c^2$"""

app.layout = html.Div(children=[
    html.Button(id='figure-reader', n_clicks=0, children='Submit'),

    html.H1(children='Mistral OCR'),

    html.Div(style={'display': 'flex'}, children=[
        html.Div(style={'flex': '50%'}, children=[
            dcc.Textarea(id="input1", placeholder="", value=default_text, 
                        style={'height':'500px', 'width': '95%'}),
        ]),
        html.Div(id='markdown-output', style={'flex': '50%'}, children=[
            dcc.Markdown(default_text, mathjax=True)
        ])
    ])
])

# 添加一个回调，当文本框内容变化时更新右侧Markdown显示
@callback(
    Output('markdown-output', 'children'),
    Input('input1', 'value')
)
def update_markdown_output(text):
    return dcc.Markdown(text, mathjax=True)

# 修改原有回调，将OCR结果写入到Textarea中，而不是替换整个markdown-box
@callback(
    Output('input1', 'value'),
    Input('figure-reader', 'n_clicks'),
    prevent_initial_call=True
)
def update_output(n_clicks):
    # Path to your image
    image_path = 'image.png'

    # Getting the base64 string
    base64_image = encode_image(image_path)

    api_key = os.environ["MISTRAL_API_KEY"]
    client = Mistral(api_key=api_key)

    ocr_response = client.ocr.process(
        model="mistral-ocr-latest",
        document={
            "type": "image_url",
            "image_url": f"data:image/png;base64,{base64_image}" 
        }
    )
    markdown = format_to_triple_quotes(ocr_response.pages[0].markdown)

    # 返回OCR结果作为Textarea的新值
    return markdown.strip()

if __name__ == '__main__':
    app.run(debug=True)

