In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import HTML
import pyperclip

from misc_util.logutils import setup_logging
from misc_util.pretty_print import Markdown, display

from derive_conceptualspace.pipeline import SnakeContext, load_envfiles, cluster_loader
from derive_conceptualspace.util.result_analysis_tools import getfiles_allconfigs, display_metrics, show_lambda_elements, highlight_nonzero_max
from derive_conceptualspace.settings import DEFAULT_N_CPUS
from derive_conceptualspace.util.threadworker import WorkerPool
from derive_conceptualspace.cli.args_from_filename import get_filename, print_envvars
from derive_conceptualspace.util.desc_object import DescriptionList

plt.rcParams['figure.figsize'] = [16, 10]

In [None]:
setup_logging()
load_envfiles("placetypes")
configs, print_cnf = getfiles_allconfigs("clusters", verbose=False, parse_all=True)

In [None]:
loaders = dict(clusters=cluster_loader, embedding=lambda **args: args["embedding"].embedding_, pp_descriptions=DescriptionList.from_json)
clusters, embedding, descriptions = SnakeContext.loader_context(config=configs[0]).load("clusters", "embedding", "pp_descriptions", loaders=loaders)
clusters, planes = clusters.values()

In [None]:
axis_dists = [{k: v.dist(embedding[i]) for k, v in planes.items()} for i in range(len(embedding))]
best_per_dim = {k: descriptions._descriptions[v].title for k, v in pd.DataFrame(axis_dists).idxmax().to_dict().items()}
print("Highest-ranking descriptions per dimension:\n    "+"\n    ".join([f"{k.ljust(max([len(i) for i in best_per_dim.keys()][:20]))}: {v}" for k, v in best_per_dim.items()][:20]))

In [None]:
#TODO: movietuner interface and display of the top 3 dimensions (did the latter already for desc15)

In [None]:

def plot_3d_tsne(df, cat_name):
    #fig = px.scatter_3d(df, x='tsne_1', y='tsne_2', z='tsne_3', color='FB_long', opacity=0.7)#, size=[2]*len(df))
    fig = go.Figure()
    for ncol,part_df in enumerate(set(df["Category"])):
        fig.add_trace(
            go.Scatter3d(
                name=part_df,
                mode='markers',
                x=df[df["Category"] == part_df]["tsne_1"],
                y=df[df["Category"] == part_df]["tsne_2"],
                z=df[df["Category"] == part_df]["tsne_3"],
                marker=dict(            
                    color=ncol,
                    size=1.5,
                    line=dict(
                        width=0
                    )
                ),
            )
        )
    #fig.update_layout(showlegend=False)
    fig.update_layout(legend= {'itemsizing': 'constant'})
    fig.update_layout(margin=dict(l=2,r=2,t=20,b=2))
    fig.show()