# Import

In [1]:
import csv
import os
import pickle
import sys

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from tqdm import tqdm

## Add configuration file

In [2]:
sys.path.append("/home/jovyan/core/config/")
sys.path.append("/home/jovyan/core/util/")
sys.path.append("../PlotFunction/lineplot/")
sys.path.append("../PlotFunction/config/")

In [3]:
from ALL import config
from line_plot_1 import line_plot_1
from line_plot_error_1 import line_plot_error_1
from line_plot_1_layout import layout
from util import *

## Set condition

In [4]:
tqdm.pandas()
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 50)

In [5]:
data_types = ["20News", "AgNews"]
vectorize_types = ["doc2vec", "sentenceBERT"]

In [6]:
model_nums = config["clustering"]["gmm"]["max_model_num"]
# covariance_types = config["clustering"]["gmm"]["covariance_types"]
normalization = "normalized"
covariance_types = ["spherical", "diag", "full"]
vector_dims = {
    "doc2vec": config["vectorize"]["doc2vec"]["dims"],
    "sentenceBERT": config["vectorize"]["sentenceBERT"]["dims"] + [384],
}

In [7]:
stats_vals = ["aic", "bic", "mi", "logl"]

# Stats

## Read data

In [8]:
chart_dict = {
    data_type: pd.read_csv(f"../data/{data_type}/CovarianceChart.csv", index_col=0)
    for data_type in data_types
}

## Data shaping

In [9]:
chart_df = pd.concat(chart_dict).T

# Make Chart

In [10]:
chart_df.style.format(
    escape="latex", formatter={"document_count": "{:.0f}"}
)

Unnamed: 0_level_0,20News,20News,20News,AgNews,AgNews,AgNews
Unnamed: 0_level_1,spherical,diag,full,spherical,diag,full
doc2vec,0.535979,0.535291,0.522142,0.454906,0.429935,0.475413
sentenceBERT,0.582425,0.590015,0.593456,0.582227,0.585635,0.607587


In [11]:
print(
    chart_df
    .style.format(precision=3, escape="latex")
    .to_latex(
        column_format="rrrr",
        position="h",
        position_float="centering",
        hrules=True,
        caption="miの比較と埋め込み次元",
        label="table:1",
        multicol_align="r",
    )
)

\begin{table}[h]
\centering
\caption{miの比較と埋め込み次元}
\label{table:1}
\begin{tabular}{rrrr}
\toprule
 & \multicolumn{3}{r}{20News} & \multicolumn{3}{r}{AgNews} \\
 & spherical & diag & full & spherical & diag & full \\
\midrule
doc2vec & 0.536 & 0.535 & 0.522 & 0.455 & 0.430 & 0.475 \\
sentenceBERT & 0.582 & 0.590 & 0.593 & 0.582 & 0.586 & 0.608 \\
\bottomrule
\end{tabular}
\end{table}

