In [1]:
import os

# Construct the relative path
cypher_file_path = os.path.join('..', '..', '..', 'cypher', 'filetypes', 'filetypes.cypher')

# Verify the path exists
if not os.path.exists(cypher_file_path):
    raise FileNotFoundError(f"Cypher file not found at: {cypher_file_path}")

# Read the file
with open(cypher_file_path, 'r') as file:
    cypher_content = file.read()

# Now you can use cypher_content

In [2]:
# Split the cypher_content into individual instructions
instructions = [instruction.strip() for instruction in cypher_content.split(';') if instruction.strip()]

In [3]:
import os
from dotenv import load_dotenv
from pathlib import Path

# Load .env file relative to the project root
project_root = os.path.join('..', '..', '..', '.env')  # Adjust based on your structure
load_dotenv(project_root)

# Neo4j credentials
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USER = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

In [4]:
from neo4j import GraphDatabase

class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def run_query(self, query, **kwargs):
        with self.driver.session() as session:
            result = session.run(query, **kwargs)
            return list(result)  # Convert to list if needed

# Initialize connection
neo4j_conn = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Execute each Cypher instruction separately
for i, instruction in enumerate(instructions, 1):
    try:
        print(f"\nExecuting instruction {i}/{len(instructions)}: {instruction[:50]}...")  # Show first 50 chars
        results = neo4j_conn.run_query(instruction)
        
        # Print results if the query returns data
        if results:
            print(f"Results from instruction {i}:")
            for record in results:
                print(record)
        else:
            print(f"Instruction {i} executed successfully (no results returned)")
            
    except Exception as e:
        print(f"Error executing instruction {i}: {str(e)}")
        # Continue or break based on your requirements
        continue  # or break to stop on first error
# Close connection
neo4j_conn.close()


Executing instruction 1/19: // Create nodes for each file type with their prop...
Instruction 1 executed successfully (no results returned)

Executing instruction 2/19: MERGE (:FileType {
  name: "Canadian Well Log ASCI...
Instruction 2 executed successfully (no results returned)

Executing instruction 3/19: MERGE (:FileType {
  name: "Text (ASCII)",
  exten...
Instruction 3 executed successfully (no results returned)

Executing instruction 4/19: MERGE (:FileType {
  name: "Comma Separated Values...
Instruction 4 executed successfully (no results returned)

Executing instruction 5/19: MERGE (:FileType {
  name: "Tab Delimited (ASCII)"...
Instruction 5 executed successfully (no results returned)

Executing instruction 6/19: MERGE (:FileType {
  name: "Metafile",
  extension...
Instruction 6 executed successfully (no results returned)

Executing instruction 7/19: MERGE (:FileType {
  name: "Windows Metafile",
  e...
Instruction 7 executed successfully (no results returned)

Executing in

In [6]:
import os
import json
import faiss
import numpy as np
import ollama
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# 1. Load JSON file
json_file_path = os.path.join('..', '..', '..', 'Training_Info', 'filetypes.json')
with open(json_file_path, 'r') as f:
    filetypes = json.load(f)

In [8]:
# 2. Chunking: Flatten each filetype into a single string
chunks = []
chunk_id_to_info = {}

for idx, ftype in enumerate(filetypes):
    chunk_text = f"""
Name: {ftype['name']}
Extension(s): {ftype['extension']}
Description: {ftype['description']}
Loadable in GEO: {ftype['load']}
Exportable from GEO: {ftype['export']}
"""
    chunks.append(chunk_text)
    chunk_id_to_info[idx] = chunk_text

In [9]:
# 3. Embedding
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedder.encode(chunks, convert_to_numpy=True)

In [10]:
# 4. Vector store using FAISS
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

In [11]:
# 5. RAG Function to handle a query
def answer_question(question, k=3):
    # Embed the question
    query_vec = embedder.encode([question], convert_to_numpy=True)
    
    # Retrieve top-k chunks
    D, I = index.search(query_vec, k)
    top_chunks = [chunk_id_to_info[i] for i in I[0]]
    
    # Construct prompt using relevant chunks
    context = "\n---\n".join(top_chunks)
    system_prompt = f"""You are a geoscience file format expert. Based on the following file type information, answer the user's question.\n\n{context}"""
    
    # Ask TinyLLaMA
    response = ollama.chat(
        model='tinyllama:latest',
        messages=[
            {'role': 'system', 'content': system_prompt},
            {'role': 'user', 'content': question}
        ]
    )
    return response['message']['content']

In [12]:
# 🔍 Example
print(answer_question("What is a VIEW file and can it be exported?"))

A VIEW (View File) is an internal file format developed by the GeoDraft application for managing log data in geoscience projects. VIEW files are created by the GEO Software Suite and can be used to import and export log data, as well as to store and organize layout-related information.

As for being exportable, yes, VIEW files can be exported from GEO in many different formats such as ASCII, Excel, CSV or XML. However, exporting a VIEW file from GEO does not guarantee that the imported data will be accurately preserved and cannot guarantee the accuracy of the imported data or the preservation of metadata or other information associated with log data. This is because GEO Software Suite files are proprietary in nature and may not be compatible with different formats or applications outside of the software suite.
