In [30]:
from jinja2 import Template
from bertopic import BERTopic 
import pandas as pd
import numpy as np 

def export_report(topic_model :BERTopic, parameters : dict, docs : list[str], embeddings : np.ndarray):
        '''Method to export the topic model as a report (HTML). The report
        is saved under the ./runs/{self.name}/report.html and contains the following: 
        
        The topics as a table
        The 2D representation
        The hierarchical representation
        Representative documents
        The parameters
        
        '''
        saving_kwargs = {
            "full_html" : False,
            "auto_play" : False,
            "include_plotlyjs" : False, 
            "include_mathjax" : False,
            "config" : {"responsive" : True} # Not sure it works nor it's useful
        }

        plotly_jinja_data = {
            'bertopic_name' : "name",
            'topics' : topic_model.get_topic_info().drop(columns=["Representative_Docs"]).to_html(),
            'map' : (
                topic_model
                .visualize_documents(
		            docs = docs,
		            embeddings = embeddings,
		            hide_annotations = True, # better readability
	            )
                .to_html(**saving_kwargs)
            ),
            'hierarchical' : (
                 topic_model
                 .visualize_hierarchy()
                 .to_html(**saving_kwargs)
            ),
            'RepresentativeTopics' : ["RepresentativeTopics - 1", "RepresentativeTopics - 2"],
            'parameters' : pd.DataFrame([
                {
                     "parameter": key,
                    "value": value 
                }
                for key, value in parameters.items()
            ]).to_html()
        }
        with open("./report.html", "w", encoding="utf-8") as output_file:
            with open("./api/activetigger/html/bertopic_report_template.html") as template_file:
                j2_template = Template(template_file.read())
                output_file.write(j2_template.render(plotly_jinja_data))

In [31]:
topic_model = BERTopic()

embeddings_path = "api/projects/test/bertopic/embeddings/bertopic_embeddings_train_alibaba-nlp-gte-multilingual-base.parquet"
embeddings = (
    pd.read_parquet(embeddings_path)
    .reset_index()
    .drop(columns=["id"])
    .to_numpy()
)
textes_path = "api/projects/test/train.parquet"
docs = (
    pd.read_parquet(textes_path)
    .reset_index()
    ["text"]
    .to_numpy()
)

_ = topic_model.fit_transform(documents=docs, embeddings=embeddings)

In [33]:
export_report(
    topic_model, 
    parameters={f"param_{i}" : f"value{i}" for i in range(10)},
    docs = docs, 
    embeddings= embeddings    
)

In [24]:
topic_model.visualize_ma