<a href="https://colab.research.google.com/github/dhrsajk9/Brain/blob/main/MemorySystem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('all-MiniLM-L6-v2')

sent1 = "The athlete ate pasta before the marathon"
sent2 = "The athlete ate pasta before the race"

embedding1 = model.encode(sent1, convert_to_tensor=True)
embedding2 = model.encode(sent2, convert_to_tensor=True)

similarity = util.pytorch_cos_sim(embedding1, embedding2)

print(f"Similarity score: {similarity.item():.4f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Similarity score: 0.9106


In [None]:
import pandas as pd
import networkx as nx
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import traceback

class KnowledgeGraph:
    def __init__(self):
        self.graph = nx.DiGraph()
        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
        self.node_embeddings = {}

In [None]:
def build_from_csv(self, file_path):
    try:
        # Load the CSV file
        df = pd.read_csv(file_path)
        print(f"Successfully loaded CSV with {len(df)} rows")
        print(f"CSV columns: {df.columns.tolist()}")

        # Display first few rows for debugging
        print("First 2 rows of the CSV:")
        print(df.head(2))

        # Process each row
        for index, row in df.iterrows():
            try:
                # Check if we have all required columns
                if 'source' not in row or 'target' not in row or 'relation' not in row:
                    print(f"Row {index} is missing required columns. Skipping.")
                    continue

                # Ensure we have string values for node names
                source = str(row['source'])
                target = str(row['target'])
                relation = str(row['relation'])

                # Add nodes (source and target)
                self.graph.add_node(source, type='entity')
                self.graph.add_node(target, type='entity')

                # Process edge attributes with defaults for optional fields
                edge_attrs = {
                    'relation': relation,
                    'sentence': str(row.get('sentence', '')) if pd.notna(row.get('sentence', '')) else '',
                    'tokens': str(row.get('tokens', '')) if pd.notna(row.get('tokens', '')) else '',
                    'tags': str(row.get('tags', '')) if pd.notna(row.get('tags', '')) else ''
                }

                # Add relation edge
                self.graph.add_edge(source, target, **edge_attrs)

                # Store embeddings for retrieval
                if source not in self.node_embeddings:
                    self.node_embeddings[source] = self.encoder.encode(source)
                if target not in self.node_embeddings:
                    self.node_embeddings[target] = self.encoder.encode(target)

            except Exception as row_error:
                print(f"Error processing row {index}: {row_error}")
                print(f"Row data: {row}")
                traceback.print_exc()

        print(f"Knowledge graph built successfully with {len(self.graph.nodes)} nodes and {len(self.graph.edges)} edges.")
        print(f"Nodes: {list(self.graph.nodes)[:5]} ... (and more)")

    except Exception as e:
        print(f"Error building knowledge graph: {e}")
        traceback.print_exc()
        raise

In [None]:
def get_related_entities(self, entity, relation_type=None):
    """Get entities connected to a given entity"""
    if entity not in self.graph.nodes:
        return []

    if relation_type:
        return [(target, data)
                for _, target, data in self.graph.out_edges(entity, data=True)
                if data.get('relation') == relation_type]

    # Return both the target nodes and the edge data
    return [(target, data) for _, target, data in self.graph.out_edges(entity, data=True)]

def find_similar_nodes(self, query, top_k=5):
    """Find most similar nodes to a query"""
    query_embedding = self.encoder.encode(query)
    similarities = {}

    for node, embedding in self.node_embeddings.items():
        sim = cosine_similarity([query_embedding], [embedding])[0][0]
        similarities[node] = float(sim)  # Convert numpy float to Python float

    # Return as list of (node, similarity) tuples
    return sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k]

In [None]:
class MemorySystem:
    def __init__(self, knowledge_graph):
        self.kg = knowledge_graph
        self.memories = []  # For storing past scenarios/experiences

    def store_memory(self, scenario, entities, relations, context):
        """Store a new memory with associated knowledge"""
        memory = {
            'scenario': scenario,
            'entities': entities,
            'relations': relations,
            'context': context,
            'embedding': self.kg.encoder.encode(scenario)
        }
        self.memories.append(memory)
        print(f"Memory stored: {scenario}")

    def retrieve_relevant_memories(self, query, top_k=3):
        """Find similar past scenarios"""
        query_embedding = self.kg.encoder.encode(query)
        similarities = []

        for mem in self.memories:
            sim = float(cosine_similarity([query_embedding], [mem['embedding']])[0][0])
            similarities.append((mem, sim))

        return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_k]

In [None]:
def answer_question(self, question):
    """Generate answers by combining KG and memory recall"""
    # Step 1: Find relevant entities in KG
    relevant_entities = self.kg.find_similar_nodes(question)

    # Step 2: Find related knowledge
    related_knowledge = []
    for entity, similarity in relevant_entities:
        entity_knowledge = self.kg.get_related_entities(entity)
        # Add the similarity score and the original entity for context
        for target, data in entity_knowledge:
            related_knowledge.append({
                'source': entity,
                'source_similarity': float(similarity),
                'target': target,
                'relation': data.get('relation', ''),
                'sentence': data.get('sentence', '')
            })

    # Step 3: Find similar past scenarios
    memory_results = self.retrieve_relevant_memories(question)
    similar_memories = []

    for mem, similarity in memory_results:
        similar_memories.append({
            'scenario': mem['scenario'],
            'similarity': float(similarity),  # Convert numpy float to Python float
            'entities': mem['entities'],
            'context': mem['context']
        })

    return {
        'relevant_entities': relevant_entities,
        'related_knowledge': related_knowledge,
        'similar_memories': similar_memories
    }

In [None]:
def create_test_csv(filename):
    import os
    if not os.path.exists(filename):
        test_data = pd.DataFrame({
            'source': ['woman', 'woman', 'athlete', 'bicycle'],
            'target': ['race', 'bicycle', 'race', 'race'],
            'relation': ['participates_in', 'uses', 'competes_in', 'used_in'],
            'sentence': ['Women participate in races', 'Women use bicycles', 'Athletes compete in races', 'Bicycles are used in races']
        })
        test_data.to_csv(filename, index=False)
        print(f"Created test CSV file: {filename}")
    else:
        print(f"Test CSV file already exists: {filename}")

In [None]:
# Add methods to the KnowledgeGraph class
KnowledgeGraph.build_from_csv = build_from_csv
KnowledgeGraph.get_related_entities = get_related_entities
KnowledgeGraph.find_similar_nodes = find_similar_nodes

# Add methods to the MemorySystem class
MemorySystem.answer_question = answer_question

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create a test CSV file
test_csv_path = "/content/test_knowledge_graph.csv"
create_test_csv(test_csv_path)

# Initialize the system
kg = KnowledgeGraph()

# Try loading the original CSV first
original_csv_path = "/content/drive/MyDrive/Brain/Memory_System/knowledge_graph_data.csv"
backup_csv_path = test_csv_path

try:
    print(f"Attempting to load: {original_csv_path}")
    kg.build_from_csv(original_csv_path)
except Exception as e:
    print(f"Failed to load original CSV: {e}")
    print(f"Trying backup CSV: {backup_csv_path}")
    try:
        kg.build_from_csv(backup_csv_path)
    except Exception as backup_e:
        print(f"Failed to load backup CSV: {backup_e}")
        print("Creating simple example graph instead")

        # Create a simple example graph manually
        kg.graph.add_node("woman", type="entity")
        kg.graph.add_node("bicycle", type="entity")
        kg.graph.add_node("race", type="entity")
        kg.graph.add_node("athlete", type="entity")
        kg.graph.add_node("food", type="entity")

        kg.graph.add_edge("woman", "race", relation="participates_in",
                         sentence="Women participate in races", tokens="", tags="")
        kg.graph.add_edge("woman", "bicycle", relation="uses",
                         sentence="Women use bicycles in races", tokens="", tags="")
        kg.graph.add_edge("athlete", "race", relation="competes_in",
                         sentence="Athletes compete in races", tokens="", tags="")
        kg.graph.add_edge("athlete", "food", relation="consumes",
                         sentence="Athletes consume food for energy", tokens="", tags="")

        # Add embeddings
        kg.node_embeddings["woman"] = kg.encoder.encode("woman")
        kg.node_embeddings["bicycle"] = kg.encoder.encode("bicycle")
        kg.node_embeddings["race"] = kg.encoder.encode("race")
        kg.node_embeddings["athlete"] = kg.encoder.encode("athlete")
        kg.node_embeddings["food"] = kg.encoder.encode("food")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Test CSV file already exists: /content/test_knowledge_graph.csv
Attempting to load: /content/drive/MyDrive/Brain/Memory_System/knowledge_graph_data.csv
Successfully loaded CSV with 30000 rows
CSV columns: ['sentence', 'source', 'target', 'relation', 'tokens', 'tags']
First 2 rows of the CSV:
                                            sentence    source target  \
0  A woman is walking along side her bicycle duri...     woman   race   
1  Her comeback was not very successful being ham...  comeback   very   

               relation                                             tokens  \
0  ['walking', 'along']  ['A', 'woman', 'is', 'walking', 'along', 'side...   
1               ['was']  ['Her', 'comeback', 'was', 'not', 'very', 'suc...   

                                                tags  
0  ['O', 'SRC', 'O', 'REL', 'REL', 'O', 'O', 'O',...  
1  ['O', 'SRC

In [None]:
memory_system = MemorySystem(kg)

# Store some example memories
memory_system.store_memory(
    scenario="A woman walking with her bicycle during a race",
    entities=["woman", "bicycle", "race"],
    relations=["walking", "along"],
    context="Sports event with participants"
)

memory_system.store_memory(
    scenario="An athlete preparing for a triathlon by testing her bicycle",
    entities=["athlete", "triathlon", "bicycle"],
    relations=["preparing", "testing"],
    context="Training session before competition"
)

memory_system.store_memory(
    scenario="An athlete eating pasta the night before a marathon",
    entities=["athlete", "food", "marathon"],
    relations=["eating", "before"],
    context="Pre-race nutrition and preparation"
)

# Query the system
question = "Niels was also the brother of Harald Bohr  He was a mathematician and he played in the National Team of Football."
result = memory_system.answer_question(question)

print(f"\nQuestion: {question}")
print("Relevant entities:")
for entity, similarity in result['relevant_entities']:
    print(f"- {entity} (Similarity: {similarity:.4f})")

print("\nRelated knowledge:")
for knowledge in result['related_knowledge']:
    print(f"- {knowledge['source']} {knowledge['relation']} {knowledge['target']}")
    if knowledge['sentence']:
        print(f"  Context: {knowledge['sentence']}")


Memory stored: A woman walking with her bicycle during a race
Memory stored: An athlete preparing for a triathlon by testing her bicycle
Memory stored: An athlete eating pasta the night before a marathon

Question: Niels was also the brother of Harald Bohr  He was a mathematician and he played in the National Team of Football.
Relevant entities:
- Bohr (Similarity: 0.3949)
- Bourdaloue (Similarity: 0.3765)
- Neville (Similarity: 0.3620)
- Hermann (Similarity: 0.3591)
- MorrisButler (Similarity: 0.3577)

Related knowledge:
- Bohr ['did'] JJ
  Context: Bohr did experiments in the Trinity College in Cambridge under the direction of JJ Thomson.
- Hermann ['was'] Minkowski
  Context: Hermann Minkowski LRB June 22  1864 â  January 12  1909 in GÃ ttingen RRB was a German mathematician.
- Hermann ['find'] HesseÂ
  Context: On Hermann HesseÂ s Der Steppenwolf we can also find a very clear picture of anomie.


In [None]:
import torch
from safetensors.torch import save_file

model_path = "memory_system_model"

# Save the model in .safetensors format
save_file(model.state_dict(), model_path + ".safetensors")


if hasattr(model, 'tokenizer'):
  if hasattr(model.tokenizer, 'vocab'):
    with open(model_path + "_vocab.txt", "w", encoding='utf-8') as f:
      for token, index in model.tokenizer.vocab.items():
          f.write(f"{token}\t{index}\n")

print(f"Model saved to {model_path}.safetensors")
print("Please remember to save any other necessary files, like a tokenizer's vocabulary, configuration, or special tokens.")


Model saved to memory_system_model.safetensors
Please remember to save any other necessary files, like a tokenizer's vocabulary, configuration, or special tokens.


In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# Build knowledge triples from your existing result
knowledge_triples = [(k['source'], k['relation'], k['target']) for k in result['related_knowledge']]

# Visualize
G = nx.DiGraph()
for source, relation, target in knowledge_triples:
    G.add_node(source)
    G.add_node(target)
    G.add_edge(source, target, label=relation)

plt.figure(figsize=(10, 7))
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, with_labels=True, node_color="skyblue", node_size=2000, edge_color="gray", font_size=10, font_weight='bold', arrows=True)
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red')
plt.title(f"Knowledge Graph for: '{question}'", fontsize=14)
plt.tight_layout()
plt.show()'''


'import networkx as nx\nimport matplotlib.pyplot as plt\n\n# Build knowledge triples from your existing result\nknowledge_triples = [(k[\'source\'], k[\'relation\'], k[\'target\']) for k in result[\'related_knowledge\']]\n\n# Visualize\nG = nx.DiGraph()\nfor source, relation, target in knowledge_triples:\n    G.add_node(source)\n    G.add_node(target)\n    G.add_edge(source, target, label=relation)\n\nplt.figure(figsize=(10, 7))\npos = nx.spring_layout(G, seed=42)\nnx.draw(G, pos, with_labels=True, node_color="skyblue", node_size=2000, edge_color="gray", font_size=10, font_weight=\'bold\', arrows=True)\nedge_labels = nx.get_edge_attributes(G, \'label\')\nnx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color=\'red\')\nplt.title(f"Knowledge Graph for: \'{question}\'", fontsize=14)\nplt.tight_layout()\nplt.show()'