In [None]:
embedding = "tuned_embedding"
root = "outputs"
load_weights = False # True if you have weights of models trained on all known terms

In [None]:
import os
os.chdir(os.path.dirname(os.getcwd()))
os.getcwd()

In [None]:
import pandas as pd
import os
import pickle
import numpy as np
from fnn.modules import fnn_model, data_loader

#### PREDICTIONS

In [None]:
dl = data_loader.Data_Loader(distance=None, create_data=True, create_tree = True, standardize=True, embedding_name=embedding, k_folds=None)

In [None]:
models = [fnn_model.simpleModel() for i in range(0,5)]

##### fit

In [None]:
if not load_weights:
    for i in range(0,5):
        print(i)
        models[i].fit_predict(dl.data[i].train_inputs, dl.data[i].train_outputs[i], epochs=300, verbose=0)
    weights = [m.model.get_weights() for m in models]
    with open(os.path.join(root,embedding,"weights.pickle"), "wb") as f:
        pickle.dump(weights, f)

##### load weights

In [None]:
if load_weights:
    with open(os.path.join(root,embedding,"weights.pickle"), "rb") as f:
        weights = pickle.load(f)
    for i in range(0,5):
        models[i].model.set_weights(weights[i])

##### predict

In [None]:
predictions = [models[i].model.predict(dl.data[i].inputs_neig) for i in range(0,5)]
predictions = [np.reshape(p, len(p)) for p in predictions]

In [None]:
dl2 = data_loader.Data_Loader(oceans = [0], distance=None, create_data=True, create_tree = True, standardize=False, embedding_name="tuned_embedding_1.5M", k_folds=None).data[0]

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(with_mean = True, with_std = True)

In [None]:
scaler.fit(np.transpose(dl2.train_outputs))

In [None]:
predictions = scaler.inverse_transform(np.transpose(predictions))

In [None]:
df_predictions = pd.DataFrame(predictions, columns=["O","C","E","A","N"])
df_predictions["word"] = dl.data[0].words_not_ocean

In [None]:
df_predictions

In [None]:
from scipy.spatial.distance import euclidean as dist

In [None]:
for c in ["O","C","E","A","N"]:
    df_predictions[c] = df_predictions[c].apply(lambda x : min(1,x))
    df_predictions[c] = df_predictions[c].apply(lambda x : max(-1,x))

In [None]:
df_predictions["cont"] = list(range(0,len(df_predictions)))

In [None]:
df_predictions["MK_O+"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([1,0,0,0,0])), axis=1, raw=True)
df_predictions["MK_O-"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([-1,0,0,0,0])), axis=1, raw=True)
df_predictions["MK_C+"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,1,0,0,0])), axis=1, raw=True)
df_predictions["MK_C-"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,-1,0,0,0])), axis=1, raw=True)
df_predictions["MK_E+"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,0,1,0,0])), axis=1, raw=True)
df_predictions["MK_E-"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,0,-1,0,0])), axis=1, raw=True)
df_predictions["MK_A+"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,0,0,1,0])), axis=1, raw=True)
df_predictions["MK_A-"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,0,0,-1,0])), axis=1, raw=True)
df_predictions["MK_N+"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,0,0,0,1])), axis=1, raw=True)
df_predictions["MK_N-"] = df_predictions[["O","C","E","A","N"]].apply(lambda x : 1-dist(x, np.asarray([0,0,0,0,-1])), axis=1, raw=True)

In [None]:
marker_index = ["MK_O+", "MK_O-", "MK_C+", "MK_C-", "MK_E+", "MK_E-", "MK_A+", "MK_A-", "MK_N+", "MK_N-"]
ocean = {"O":0,"C":1,"E":2,"A":3,"N":4}
for mk in marker_index:
    oc = mk[3:4]
    cont_oc = ocean[oc]
    df_predictions[mk] = df_predictions[["cont",oc,mk]].apply(lambda x : x[2] if dl.data[cont_oc].is_significant_term(int(x[0]), x[1], cont_oc) else -100, raw=True, axis=1)

In [None]:
df_predictions

In [None]:
df_predictions.to_excel(os.path.join(root,embedding,"predictions.xlsx"), index=False)

In [None]:
char = "N+"
df_predictions.sort_values("MK_"+char, ascending=False).head(50)[["word","MK_"+char]]

In [None]:
char = "O"
df_predictions.sort_values(char, ascending=False).head(50)[["word", char]]