graph definition

In [27]:
# import section

from scipy.spatial import distance_matrix
import networkx as nx

In [28]:
def compute_adjacency_matrix(route_distances, num_neighbors):
    route_distances = route_distances.to_numpy() 
    num_links = route_distances.shape[0]
    route_distances = route_distances / np.max(route_distances)
    closest_nodes = np.argsort(route_distances, axis=1)[:, :num_neighbors]
    adjacency_matrix = np.zeros((num_links, num_links))

    for i in range(num_links):
        adjacency_matrix[i, closest_nodes[i]] = 1

    w_mask = np.ones([num_links, num_links]) - np.identity(num_links)
    adjacency_matrix = adjacency_matrix.astype(int) * w_mask
    edge_attributes = route_distances[adjacency_matrix == 1]

    return adjacency_matrix, edge_attributes

In [29]:
#graph class : edges, nodes, adjacency matrix (from the distance matrix), here i use a custom function

class GraphInfo:
    def __init__(self, edges: typing.Tuple[list, list], num_nodes: int):
        self.edges = edges
        self.num_nodes = num_nodes

adjacency_matrix, edge_attributes = compute_adjacency_matrix(distance_matrix, num_neighbors=4)

node_indices, neighbor_indices = np.where(adjacency_matrix == 1)

# graph definition
graph = GraphInfo(
    edges=(node_indices.tolist(), neighbor_indices.tolist()),
    num_nodes=adjacency_matrix.shape[0],
)

print(f"number of nodes: {graph.num_nodes}, number of edges: {len(graph.edges[0])}")

NameError: name 'typing' is not defined

In [26]:
#visualize with NetworkX

G = nx.Graph()
G.add_nodes_from(range(graph.num_nodes))
edges = zip(graph.edges[0], graph.edges[1])
G.add_edges_from(edges)

fig, ax = plt.subplots(figsize=(50, 25))

nx.draw(G, with_labels=True, node_color='red', node_size=100, edge_color='black', width=0.9, ax=ax)
plt.show()

search for embedding similarities: example from notebook

In [None]:
#OpenAI part

openai_client = AzureOpenAI(
    api_version=os.environ["OPENAI_API_VERSION"],
    azure_endpoint=os.environ["OPENAI_API_BASE"],
    api_key=os.environ["OPENAI_API_KEY"]
)

embedding = openai_client.embeddings.create(input=["sample text", "other sample text"], 
                                    model=os.environ["ADA002_DEPLOYMENT"])

print(len(embedding.data[0].embedding))

In [None]:
embedded_question = openai_client.embeddings.create(input=["I'm looking for examples of End of Well Reports from Vermillion Energy?"], 
                                    model=os.environ["ADA002_DEPLOYMENT"])
embedded_question = np.array(embedded_question.data[0].embedding).reshape(1, -1)

sentence_embeddings_ada_np = np.array(sentence_embeddings_ada)

print(sentence_embeddings_ada_np.shape)

cosines = cosine_similarity(embedded_question, sentence_embeddings_ada_np)
sorted_sentences = [sentences[idx] for idx in np.argsort(cosines)[0][::-1]]
sorted_sentences_df = pd.DataFrame({"most similar sentences in corpus": sorted_sentences[:5]})

sorted_sentences_df = set_styling(sorted_sentences_df)
display(sorted_sentences_df)

different LLM agents 

In [None]:
from collections import Counter
from transformers import pipeline

models = ["model1", "model2"]

pipelines = [pipeline("text", model = model) for model in models]

def get_predictions(txt):
    # faire un echang sans intersection
    predictions = [pipeline(txt) for pipeline in pipelines]
    return predictions

def majority_vote(predictions):
    all_pred = [prediction["txt"] for model_predictions in predictions for prediction in model_predictions]
    vote_counter = Counter(all_predictions)
    majority_predict = vote_counter.most_common(1)[0][0]
    return majority_prediction
    

In [None]:
def edge_creation_fun(data_embedding):
    print(data_embending_np.shape)
    my_edge=[]
    my_nodes=[]
    for i in range(len(data_embedding)):
        for j in range(i+1,len(data_embedding)):
            my_cosine = cosine_similarity(data_embedding[i], data_embedding[j])
            if(my_cosine>seuil):
                # Stockage des résultats et des indices
                my_edge.append(my_cosine)
                my_nodes.append((i, j))
    return my_edge, my_nodes

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def edge_creation_fun2(data_embedding, seuil):
    # Calculer la similarité cosinus entre toutes les paires de vecteurs
    similarities = cosine_similarity(data_embedding, data_embedding)
    # Trouver les indices des paires similaires où row_indices est différent de col_indices
    row_indices, col_indices = np.where((similarities > seuil) & (np.arange(len(data_embedding))[:, None] != np.arange(len(data_embedding))))
    # Créer une liste d'arêtes et de nœuds
    my_edge = [similarities[i, j] for i, j in zip(row_indices, col_indices)]
    my_nodes = list(zip(row_indices, col_indices))
    return my_edge, my_nodes

In [None]:
from dotenv import load_dotenv
import os
from openai import AzureOpenAI
from langchain.vectorstores import FAISS
import numpy as np

load_dotenv(".env.shared")
load_dotenv(".env.secret")
openai_client = AzureOpenAI(
    api_version=os.environ["OPENAI_API_VERSION"],
    azure_endpoint=os.environ["OPENAI_API_BASE"],
    api_key=os.environ["OPENAI_API_KEY"]
)
def compute_embeddings(input) :
    embedded_question = openai_client.embeddings.create(input=["I'm looking for examples of End of Well Reports from Vermillion Energy?"],
                                    model=os.environ["ADA002_DEPLOYMENT"])
    embedded_question = np.array(embedded_question.data[0].embedding).reshape(1, -1)
    return embedded_question
    
def main() :
    query = "I'm looking for examples of End of Well Reports from Vermillion Energy?"
    db = FAISS.load_local("data/embeddings",openai_client.embeddings.create(model=os.environ["ADA002_DEPLOYMENT"],input=""))
    docs = db.similarity_search(query)
    print(docs[0].page_content)
if __name__ == "__main__" :
    main()