In [None]:
from io import StringIO

import incense
from incense import ExperimentLoader
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import sklearn.svm
import sklearn.metrics
import math

from src.static.settings import MONGO_URI, ENV_FILE_PATH, SPACES_DATA_BASE
from src.main.load_data.load_semanticspaces import load_mds_representation, get_names, get_classes
from scripts.create_siddata_dataset import display_mds, load_translate_mds #TODO display in scripts?!
from src.main.util.mds_object import TRANSL

from src.main.util.threedfigure import ThreeDFigure, make_meshgrid
from src.main.util.base_changer import Plane, make_base_changer

In [None]:
data_set = "courses"
cat_name = "Fachbereich"

In [None]:
# mds, _ = load_mds_representation(SPACES_DATA_BASE, data_set, mds_dim)
mds = load_translate_mds("/home/chris/Documents/UNI_neu/Masterarbeit/DATA_CLONE/", f"siddata_names_descriptions_mds_3.json", translate_policy=TRANSL).mds
names, _ = get_names(SPACES_DATA_BASE, data_set)

In [None]:
display_mds(mds, names, max_elems=2)

In [None]:
classes = get_classes(SPACES_DATA_BASE, data_set, what=cat_name)
dict(list(classes.items())[:5])

In [None]:
unique_classes = list(set(classes.values()))
fb_courses = {cls: [i for i in names if classes[i] == cls] for cls in unique_classes}
fb_indices = {cls: [names.index(elem) for elem in elems if names.index(elem) < mds.embedding_.shape[0]] for cls, elems in fb_courses.items()} 
{k: v[:3] for k,v in list(fb_indices.items())[:3]}

In [None]:
with ThreeDFigure() as fig:
    for ind, (clsname, indices) in enumerate(fb_indices.items()):
        fig.add_markers(mds.embedding_[indices], color=ind, size=2, name=clsname)
fig.show()

In [None]:
plot_allagainst = "Wiwi"

In [None]:
flatten = lambda l: [item for sublist in l for item in sublist] 
grouped = {plot_allagainst: fb_indices[plot_allagainst], "others": flatten([val for key, val in fb_indices.items() if key != plot_allagainst])}
{k: v[:3] for k,v in list(grouped.items())[:3]}

In [None]:
with ThreeDFigure() as fig:
    for ind, (clsname, indices) in enumerate(grouped.items()):
        fig.add_markers(mds.embedding_[indices], color=ind, size=2, name=clsname)
fig.show()

In [None]:
classes = np.array([i in grouped["others"] for i in range(mds.embedding_.shape[0])], dtype=int)

svm = sklearn.svm.LinearSVC(dual=False, class_weight="balanced")
svm.fit(mds.embedding_, classes)
svm_results = svm.decision_function(mds.embedding_)

In [None]:
decision_plane = Plane(*svm.coef_[0], svm.intercept_[0])

with ThreeDFigure() as fig:  
    #fig.add_markers(mds.embedding_, color=classes, size=1)  # samples
    for ind, (clsname, indices) in enumerate(grouped.items()):
        fig.add_markers(mds.embedding_[indices], color=ind, size=1, name=clsname)
    fig.add_surface(decision_plane, mds.embedding_, classes, 0.1, color="lightblue", showlegend=True)   # decision hyperplane
    
    fig.add_line(X.mean(axis=0) - decision_plane.normal*20, X.mean(axis=0) + decision_plane.normal*20, width=5, name="orthogonal") 
    fig.add_markers([0, 0, 0], size=3, name="coordinate center") 
    #fig.add_line(-decision_plane.normal * 5, decision_plane.normal * 5)  # orthogonal of decision hyperplane through [0,0,0]
    #fig.add_sample_projections(X, decision_plane.normal)  # orthogonal lines from the samples onto the decision hyperplane orthogonal

fig.show()

In [None]:
correct_preds = [classes[i] == (svm_results[i] > 0) for i in range(len(classes))]
correct_percentage = round(sum(correct_preds)/len(correct_preds), 4)*100
correct_percentage

In [None]:
conf_mat = sklearn.metrics.confusion_matrix(y_true=classes, y_pred=[svm_results[i] > 0 for i in range(len(classes))])
sklearn.metrics.ConfusionMatrixDisplay.from_estimator(svm, X, y)

In [None]:
print(round(conf_mat[0,0]/conf_mat[0].sum()*100, 2))
print(round(conf_mat[1,1]/conf_mat[1].sum()*100, 2))