In [325]:
from langchain_community.llms import Ollama
from langchain.embeddings import OllamaEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain.schema import SystemMessage, HumanMessage
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import networkx as nx
import itertools
import json
import os

llm = Ollama(model="mistral")

# Setup

In [326]:
big5_characteristics = {
    "Openness": "creative, perceptual, curious and philosophical",
    "Conscientiousness": "organized, responsible, disciplined and prudent",
    "Neuroticism": "anxious, depressed, insecure and emotional",
    "Agreeableness": "cooperative, compasionnate, trustworthy and humble",
    "Extraversion": "friendly, positive, assertive and energetic"
}

In [327]:
role = "a helpful, expert personality methodologist who generates items for each of the Big Five Personality traits. You know how to carefully craft succinct items that meaningfully target the many aspects of a given trait."

# Génération des items

In [328]:
if os.path.isfile("items_dict.json"):
    with open("items_dict.json", "r") as f:
        responses = json.load(f)
else:
    chat = ChatOllama(model="mistral")
    responses = {}
    for trait, characteristics in big5_characteristics.items():
        instructions = f"""
    Generate a total of eight NEW items that assess one’s level of {trait}. Here are the
    characteristics of {trait}: {characteristics}. Generate exactly TWO items per
    characteristic. It is VERY important that you generate EXACTLY eight TOTAL items in
    the CORRECT formatting. FOLLOW this format EXACTLY for each item: :: Do NOT
    add any characteristics. Do NOT leave any out. The stem ‘I am someone who’ MUST
    be explicitly written out at the beginning of each item. Separate items using \n. Do NOT
    number the items. Lastly, be creative and explore novel ideas!

    example1:
    I am someone who is compassionate, has a soft heart.

    example2:
    I am someone who starts arguments with others.
    """
        # Set role and instructions
        messages = [
            SystemMessage(content=role),
            HumanMessage(content=instructions)
        ]

        response = chat.invoke(messages)
        responses[trait] = response.content.split("\n  ")
    with open("items_dict.json", "w") as f:
        json.dump(responses, f)

In [329]:
responses

{'Openness': ['1. I am someone who often finds myself lost in thought about abstract concepts and theories.\n',
  '2. I am someone who enjoys pondering the meaning of life and the universe.\n',
  '3. I am someone who frequently engages in activities that require creativity, such as painting or writing.\n',
  '4. I am someone who is always seeking out new experiences, whether they be cultural, intellectual, or sensory.\n',
  '5. I am someone who values and appreciates art, music, and literature from various cultures and periods.\n',
  '6. I am someone who often daydreams about alternative realities or ideas.\n',
  '7. I am someone who is always eager to learn new things, especially when it comes to unfamiliar topics or ideas.\n',
  '8. I am someone who values intellectual stimulation and is not content with routine or mundane activities.'],
 'Conscientiousness': ['1. I am someone who values organization and efficiency in my daily tasks.',
  '2. I am someone who always completes assignme

# Encodage des items

In [330]:
# setting up the embedding model
embedding_model = OllamaEmbeddings(model="mistral")

trait_embeddings = dict()

for trait, items in responses.items():
    # Get the embedding
    items_embeddings = [embedding_model.embed_query([item.replace("I am someone who ", "")]) for item in items]
    trait_embeddings[trait] = items_embeddings

embeddings_list = [embedding for embedding_list in trait_embeddings.values() for embedding in embedding_list]

similarity_matrix = cosine_similarity(embeddings_list)

In [331]:
similarity_matrix

array([[1.        , 0.96158119, 0.95709082, ..., 0.94490946, 0.86045818,
        0.83542246],
       [0.96158119, 1.        , 0.96874459, ..., 0.95129384, 0.87480012,
        0.85443881],
       [0.95709082, 0.96874459, 1.        , ..., 0.96894289, 0.90509717,
        0.88260029],
       ...,
       [0.94490946, 0.95129384, 0.96894289, ..., 1.        , 0.90880183,
        0.89517571],
       [0.86045818, 0.87480012, 0.90509717, ..., 0.90880183, 1.        ,
        0.97171329],
       [0.83542246, 0.85443881, 0.88260029, ..., 0.89517571, 0.97171329,
        1.        ]], shape=(40, 40))

# TMFG

## Finding the base tetrahedron (4 closest items)

In [339]:
dim = len(embeddings_list)

labels = [i for i in range(40)]

quaduplets = list(itertools.combinations(labels, 4))

best_quaduplet = None
best_similarity = -1

for quaduplet in quaduplets:
    i, j, k, l= [labels.index(t) for t in quaduplet]
    similarity = [
        similarity_matrix[i,j],
        similarity_matrix[i,k],
        similarity_matrix[i,l],
        similarity_matrix[j,k],
        similarity_matrix[j,l],
        similarity_matrix[k,l]
    ]

    min_similarity = min(similarity)

    if min_similarity > best_similarity:
        best_similarity = min_similarity
        best_quaduplet = (i, j, k, l)

In [393]:
Clique1 = best_quaduplet
Triangles = list(itertools.combinations(best_quaduplet, 3))
Vertices = set(range(dim)).difference(set(best_quaduplet))
Separators = list()
cliques = [Clique1]
P = np.zeros_like(similarity_matrix)
P[np.ix_(best_quaduplet, best_quaduplet)] = similarity_matrix[np.ix_(best_quaduplet, best_quaduplet)]

In [394]:
Maxgain = list()
Bestvertices = list()
for triangle in Triangles:
    best_vertex = np.argmax(np.sum(similarity_matrix[np.ix_(triangle, list(Vertices))], axis=0))
    Bestvertices.append(list(Vertices)[best_vertex])
    Maxgain.append(np.sum(similarity_matrix[np.ix_(triangle, list(Vertices))], axis=0)[best_vertex])

In [395]:

best_vertices_1 = Bestvertices.copy()
Triangles_1 = Triangles.copy()
Maxgain_1 = Maxgain.copy()


In [396]:
Triangles

[(9, 11, 13), (9, 11, 25), (9, 13, 25), (11, 13, 25)]

In [397]:
Bestvertices

[37, 2, 2, 29]

In [398]:
Maxgain

[np.float64(2.9113153725590566),
 np.float64(2.9144896682546744),
 np.float64(2.9167301675236392),
 np.float64(2.9126138776240387)]

In [399]:
maxgain_index = np.argmax(Maxgain)
best_vertex = Bestvertices.pop(maxgain_index)
print(best_vertex)

2


In [400]:
to_update = [idx for idx, vertex in enumerate(Bestvertices) if vertex == best_vertex] # np.where(Bestvertices==best_vertex)
print(to_update)

[1]


In [401]:
Bestvertices = best_vertices_1.copy()
Triangles = Triangles_1.copy()
Maxgain = Maxgain_1.copy()


best_list = []
i=0
while Vertices:
    maxgain_index = np.argmax(Maxgain)
    best_vertex = Bestvertices.pop(maxgain_index)
    idx_to_update = [idx for idx, vertex in enumerate(Bestvertices) if vertex == best_vertex] # np.where(Bestvertices==best_vertex)
    best_list.append(best_vertex)
    Vertices.remove(best_vertex)
    triangle = Triangles.pop(maxgain_index)
    Maxgain.pop(maxgain_index)
    best_vertices = [vertex for vertex in triangle] + [int(best_vertex)]
    best_triangles = list(itertools.combinations(best_vertices,3))
    best_triangles.remove(triangle)
    best_triangles
    Triangles.extend(best_triangles)
    Separators.append(triangle)
    Cliquei = best_triangles + [triangle]
    P[np.ix_(triangle, triangle)] -= similarity_matrix[np.ix_(triangle, triangle)]
    Cliquei_idx = list(set([i for t in Cliquei for i in t]))
    P[np.ix_(Cliquei_idx, Cliquei_idx)] += similarity_matrix[np.ix_(Cliquei_idx, Cliquei_idx)]
    if not Vertices:
        break
    for triangle in Triangles[-3:]:
        best_vertex = np.argmax(np.sum(similarity_matrix[np.ix_(triangle, list(Vertices))], axis=0))
        Bestvertices.append(list(Vertices)[best_vertex])
        Maxgain.append(np.sum(similarity_matrix[np.ix_(triangle, list(Vertices))], axis=0)[best_vertex])
    for idx in idx_to_update:
        best_vertex = np.argmax(np.sum(similarity_matrix[np.ix_(Triangles[idx], list(Vertices))], axis=0))
        Bestvertices[idx] = list(Vertices)[best_vertex]
        Maxgain[idx] = np.sum(similarity_matrix[np.ix_(Triangles[idx], list(Vertices))], axis=0)[best_vertex]
        