See DemoSetupREADME.md for configuration instructions



In [None]:
%pip install pydgraph python-graphql-client ipycytoscape qdrant_client
import pydgraph
import json
import base64
import getpass
from python_graphql_client import GraphqlClient
from qdrant_client import QdrantClient
# Load configuration from user-specific file
config_file = "config-local.json"  # Name of the user-specific configuration file
with open(config_file, "r") as f:
    config = json.load(f)

# Extract values from the configuration
dgraph_cerebro = config["dgraph_cerebro"]
dgraph_graphql_endpoint = config["dgraph_graphql_endpoint"]
dgraph_grpc = config["dgraph_grpc"]
qdrnt_endpoint = config["qdrnt_endpoint"]
qdrnt_collection = config["qdrnt_collection"]
qdrnt_api_key = config["qdrnt_api_key"]
dgraph_cloud_user = config["dgraph_cloud_user"]
dgraph_cloud_passw = config["dgraph_cloud_passw"]
APIAdminKey = config["APIAdminKey"]

OpenAIKey = config["OpenAIKey"]# the host or IP addr where your Dgraph alpha service is running



# graph admin endpoint is /admin
dgraph_graphql_admin = dgraph_graphql_endpoint.replace("/graphql", "/admin")
print(dgraph_graphql_admin)




# Enter credentials and set up db client objects

In [None]:

# Cloud credentials
# we need the cloud login credential to upload the Lambda code.
# we need the an Admin API key generated at https://cloud.dgraph.io/_/settings?tab=api-keys for DQL alter and query



# DQL Client
client_stub = pydgraph.DgraphClientStub.from_cloud(dgraph_grpc,APIAdminKey )
client = pydgraph.DgraphClient(client_stub)


# GraphQL client and admin client
gql_client = GraphqlClient(endpoint=dgraph_graphql_endpoint)
headers = { "Dg-Auth": APIAdminKey }
gql_admin_client = GraphqlClient(endpoint=dgraph_graphql_admin, headers=headers)
gql_cloud_client = GraphqlClient(endpoint=dgraph_cerebro)

print("graphql client connections/objects established")

In [None]:

# Verifying cluster health and delete all data including GraphQL schema
#
data = gql_admin_client.execute(query="{health {instance version status}}")
if 'errors' in data:
   raise Exception(data['errors'][0]['message'])


print("generic graphql client, check cluster health:", json.dumps(data, indent=2))


# Clear old data and load schema and new data

In [None]:
# TODO: add drop all permissions
# for now: drop data using cloud GUI shcema > drop all button

#Drop all data including schema from the Dgraph instance. This is a useful
# for small examples such as this since it puts Dgraph into a clean state.

#TODO: Do this from Dgraph Cloud Schema tab. Hit Drop data, then drop all data to clean the dgraph db.
confirm = input("drop schema and all data (y/n)?")
if confirm == "y":
  op = pydgraph.Operation(drop_all=True)
  client.alter(op)
  print("schema and data deleted")


In [None]:
# Deploy the GraphQL Schema

graphql_schema = """
type Equipment {
  id: ID!
  name: String! @search(by: [term])
  model: EquipmentModel
  sensorData: [SensorData] @hasInverse(field: equipment)
  oilRig: OilRig @hasInverse(field: equipment)
}

type EquipmentModel {
  id: ID!
  model: String!
  installed: [Equipment] @hasInverse(field: model)
}

type SensorData @lambdaOnMutate(add: true, update: false, delete: false) {
  id: ID!
  timestamp: DateTime!
  value: Float!
  equipment: Equipment! @hasInverse(field: sensorData)
  embeddings: [Float!] @lambda
}

type OilRig @lambdaOnMutate(add: true, update: false, delete: false) {
  id: ID!
  name: String! @search(by: [fulltext, exact])
  equipment: [Equipment!]! @hasInverse(field: oilRig)
  issues: [Issue] @hasInverse(field: oilRig)
}

type Issue @lambdaOnMutate(add: true, update: false, delete: false) {
  id: ID!
  name: String!
  description: String!
  embeddings: [Float!] @lambda
  solution: String!
  equipment: [Equipment]
  oilRig: OilRig! @hasInverse(field: issues)
  similarIssues: [Issue] @lambda
  score: Float
}

type Expert {
  id: ID!
  name: String!
  expertise: [String!] @search(by: [term])
}

type Query {
  getSimilarIssues(description: String!, first: Int!): [Issue] @lambda
  getSimilarIssuesById(uid: String!, first: Int!): [Issue] @lambda
  getRelevantExperts(issueName: String!): [Expert] @lambda
}

input OilRigInput {
  name: String!
  equipment: [EquipmentInput!]
}

input EquipmentInput {
  name: String!
  sensorData: [SensorDataInput!]
}

input SensorDataInput {
  timestamp: DateTime!
  value: Float!
}
"""

mutation = """
mutation($sch: String!) {
  updateGQLSchema(input: { set: { schema: $sch}})
  {
    gqlSchema {
      schema
      generatedSchema
    }
  }
}
"""

variables = {"sch": graphql_schema}
schemadata = gql_admin_client.execute(query=mutation, variables=variables)
print("GraphQL Schema after Update")
print(schemadata)


In [None]:
# shared equipment

addEquipmentModel = """
mutation MyMutation($input: [AddEquipmentModelInput!]!) {
  response: addEquipmentModel(input: $input) {
     equipmentModel {id}
  }
  }
"""
equip_model_data = [
    {
        "input":
          {"model": "Acme Pumps WHC-92"}
    }
]

# Run the mutation
for em in equip_model_data:
    print(em)
    result = gql_client.execute(addEquipmentModel, variables={"input": em["input"]})
    print("result=", result)
    compressorId = result['data']['response']['equipmentModel'][0]['id']
    print("wellhead id="+compressorId)



addWellheadCompressor = """
mutation MyMutation($input: [AddEquipmentInput!]!) {
  response: addEquipment(input: $input) {
      equipment { name id }
    }
  }
"""
equip_data = [
    {
    "input": [
      {"name": "Wellhead Compressor",
        "model": {
          "id": compressorId
        }},
      {"name": "Wellhead Booster",
        "model": {
          "id": compressorId
        }},
    ]
    },
]

# Run the mutation
for e in equip_data:
    print(e)
    result = gql_client.execute(addWellheadCompressor, variables={"input": e["input"]})
    print(result)
    wellheadCompressorId = result['data']['response']['equipment'][0]['id']
    wellheadBoosterId = result['data']['response']['equipment'][1]['id']
    print("wellhead id="+wellheadCompressorId)
    print("pump id="+wellheadBoosterId)

In [None]:
# Oil rigs
addOilRigRaph = """
mutation MyMutation($input: [AddOilRigInput!]!) {
  response: addOilRig(input: $input) {
    oilRig {
      id
      equipment { name id }
      issues {
        id
        equipment { name id }
      }
    }
  }
}
"""



oil_rig_data =   [
    {
        "input": {
            "name": "Ocean Driller 1",
            "equipment": [
                {
                    "name": "Downhole drilling motor A"
                },
                {
                    "name": "Drill bit A"
                },
                {
                    "name": "Drill stabilizer A"
                },
                {
                    "name": "Downhole Pump A"
                }
            ],
            "issues": [
                {
                    "name": "Mud Gate Valve leak",
                    "description": "Primary mud gate valve leaking slightly. 3 inch valve leaking below bonnet. Stopped drilling and replaced bonnet seal.",
                    "solution": "Tightened valve connection"
                }
            ]
        }
    },
    {
        "input": {
            "name": "Ocean Driller 2",
            "equipment": [
                {
                    "name": "Downhole drilling motor A"
                },
                {
                    "name": "Drill bit A"
                },
                {
                    "name": "Drill stabilizer A"
                },
                {
                    "name": "Downhole Pump A"
                }
            ],
            "issues": [
              {
                "name": "Shale vibration issue",
                "description": "Vibration detected coupled with slowly rising temperature readings while drilling through light shale layer at depth of 1700 meters. Downhole bore assembly recently inspected and unlikely to be at issue.",
                "solution": ""
              }
            ]
        }
    },
    {
        "input": {
            "name": "West Texas ND-3",
            "equipment": [
                {
                    "name": "Downhole drilling motor B"
                },
                {
                    "name": "Drill bit A"
                },
                {
                    "name": "Drill stabilizer A"
                },
                {
                    "id": wellheadCompressorId,
                    "name": "Wellhead Compressor",
                    "model": {
                        "model": "Acme Booster Compressor 3000"
                    }
                }
            ],
            "issues": [
              {
                  "name": "Wellhead compressor vibration",
                  "description": "Wellhead compressor rattling. Probably earlier liquid slug hit due to report of overnight banging Temperature remaining low - no changes since banging. Inspection showed some bearing wear previously.",
                  "solution": "Ordered new compressor bearings. Reduced pressure and switched to oil-based mud for the short term to reduce load on compressor.",
                  "equipment": [{"id": wellheadCompressorId}]
              }
            ]
        }
    },
    {
        "input": {
            "name": "West Texas ND-4",
            "equipment": [
                {
                    "name": "Booster compressor",
                    "id": wellheadBoosterId
                },
                {
                    "name": "Drill bit B"
                },
                {
                    "name": "Drill stabilizer A"
                },
                {
                    "name": "Downhole Pump A"
                }
            ],
            "issues": [
              {
                "name": "Reduced pressure",
                "description": "Wellhead compressor may be failing - lower pressure and higher temperature observed (230psi vs expected 300psi, and temperature rising to 230+ degrees on hot days.",
                "solution": "Pumped Oil-based mud (OBM) into the borehole. Moderate mud weight of 14 pounds per gallong (ppg) used to avoid cavitation. Scheduled service of the pump to determine root cause.",
                "equipment": [{"id": wellheadBoosterId}]
              }
            ]
        }
    },
    {
        "input": {
            "name": "Ocean Driller 5",
            "equipment": [
                {
                    "name": "Downhole drilling motor A"
                },
                {
                    "name": "Drill bit B"
                },
                {
                    "name": "Drill stabilizer A"
                },
                {
                    "name": "Booster Compressor (TODO: equip)"
                }
            ],
            "issues": [
              {
                "name": "Drill hot",
                "description": "Drill collar overheating, requiring drill slowdown. Suspect an issue with mud volume, or low pressure due to failing booster pump. There was a slug hit on the compressor last week, making the pump",
                "solution": "Pumped Oil-based mud (OBM) into the borehole. Moderate mud weight of 14 pounds per gallong (ppg) used to avoid cavitation."
              }
            ]
        }
    }
]



# Run the mutation
for rig in oil_rig_data:
    print(rig)
    result = gql_client.execute(addOilRigRaph, variables={"input": rig["input"]})
    print(result)


In [None]:
addExpert = """
mutation NewExpertMutation($name: String!, $expertise: [String!]!) {
    addExpert(input: {name: $name, expertise: $expertise}) {
        expert {
            id
        }
    }
}"""
# Experts
expert_data = [
    {
        "name": "Dr. James Bond",
        "expertise": ["Downhole pump systems, including collar, bits, and mud pumps", "Pump repair and diagnosis", "Temperature, pressure and vibration monitoring"]
    },
    {
        "name": "Prof. Maria Hill",
        "expertise": ["Valve sizing and monitoring", "Fluid dynamics", "Flow rate disruptions or slowdown", "Cavitation detection and remediation"]
    },
    {
        "name": "Mr. Tony Stark",
        "expertise": ["Motor mechanisms", "Motor overheating, overload, lubrication and cooling issues", "Geochemistry and rock formations", "Stick-slip detection and rebalancing"]
    }
]

expert_ids = []
for expert in expert_data:
    variables = {"name": expert["name"], "expertise": expert["expertise"]}
    result = gql_client.execute(query=addExpert, variables=variables)
    expert_ids.append(result['data']['addExpert']['expert'][0]['id'])

In [None]:
#
# Assuming we don't need this in the approach where the equipment is nested items under the Oil Rig in above mutation
#
#


addEquipment = """
mutation NewEquipmentMutation($name: String!) {
    addEquipment(input: {name: $name}) {
        equipment {
            id
        }
    }
}"""
equipment_data = [
    {
        "name": "Pump A"
    },
    {
        "name": "Motor B"
    },
    {
        "name": "Valve C"
    }
]

# Equipment
equipment_ids = []
for equipment in equipment_data:
    variables = {"name": equipment["name"]}
    result = gql_client.execute(query=addEquipment, variables=variables)
    equipment_ids.append(result['data']['addEquipment']['equipment'][0]['id'])

In [None]:
#
# Assuming we don't need this in the approach where the sensor data is nested items under the Oil Rig in above mutation
#
#


addSensorData = """
mutation NewSensorDataMutation($timestamp: DateTime!, $value: Float!, $equipmentID: ID!) {
    addSensorData(input: {timestamp: $timestamp, value: $value, equipment: {id: $equipmentID}}) {
        sensorData {
            id
        }
    }
}"""
sensor_data = [
    {
        "timestamp": "2023-06-24T18:25:43.511Z",
        "value": 10.5
    },
    {
        "timestamp": "2023-06-20T13:15:30.789Z",
        "value": 30.2
    },
    {
        "timestamp": "2023-06-19T09:05:21.654Z",
        "value": 50.1
    }
]
# Sensor data
for i, data in enumerate(sensor_data):
    variables = {"timestamp": data["timestamp"], "value": data["value"], "equipmentID": equipment_ids[i]}
    gql_client.execute(query=addSensorData, variables=variables)

In [None]:
# Issues are now part of the overall Oil Rig data above
# Skip this.
#


#Create Issues
addIssue = """
mutation NewIssueMutation($name: String!, $description: String!, $solution: String!) {
    addIssue(input: {name: $name, description: $description, solution: $solution}) {
        numUids
    }
}"""
exampleIssue = """
mutation MyMutation {
  addIssue(input: {
      name: "Shale vibration issue",
      description: "Vibration detected coupled with slowly rising temperature readings while drilling through light shale layer at depth of 230 meters. Downhole bore assembly recently inspected and unlikely to be at issue.",
      solution: "",
      oilRig: {id: "0xfffd8d7286de7b9e"}}) {
    issue {
      id
      name
      oilRig {
        name
      }
    }
  }
}
"""
issues_data = [
    {"name": "Mud pump hammering", "equipment":"Pump 1",
       "description": "Hammering noise from piples and pump area when using heavy mud (18 ppg). Vibration sensors downwell suggest some cavitation. Pressure fluctuating +/- 20 psi during hammer noise incidents, but pump pressure remaiing over 200psi.",
       "solution": "Replaced pump filter"},
    {"name": "Drill ",
       "description": "Drill collar overheating",
       "solution": "Pumped Oil-based mud (OBM) into the borehole. Moderate mud weight of 14 pounds per gallong (ppg) used to avoid cavitation."},
    {"name": "Wellhead compressor noise",
       "description": "Wellhead compressor noise on hot days (starting around temperature over 35 degrees C)",
       "solution": "Ordered new compressor bearings. Reduced pressure and switched to oil-based mud for the short term to reduce load on compressor."},
    {"name": "Mud Gate Valve leak",
       "description": "Primary mud gate valve leaking slightly. 3 inch valve leaking below bonnet. Stopped drilling and replaced bonnet seal.",
       "solution": "Tightened valve connection"},
]

for issue in issues_data:
    gql_client.execute(query=addIssue, variables=issue)

print("Issues created")


In [None]:
#
# Are these used?
#
#

# Create Status Updates
addStatusUpdate = """
mutation NewStatusUpdateMutation($time: DateTime!, $description: String!) {
    addStatusUpdate(input: {time: $time, description: $description}) {
        numUids
    }
}"""

from datetime import datetime, timedelta

time_now = datetime.now()
status_updates_data = [
    {"time": time_now.isoformat(), "description": "Pump A making strange noise"},
    {"time": (time_now - timedelta(days=1)).isoformat(), "description": "Drill B overheating"},
    {"time": (time_now - timedelta(days=2)).isoformat(), "description": "Compressor C not starting"},
    {"time": (time_now - timedelta(days=3)).isoformat(), "description": "Valve D leaking"},
]

for update in status_updates_data:
    gql_client.execute(query=addStatusUpdate, variables=update)

print("Status updates created")


# Add JS hooks (lambda functions) to call LLM + Vector DB

In [None]:
# Get a token to be able to change the database
# Lambda is deployed through Cloud Cerebro endpoint, and needs a token

login = """
query  login($email: String!, $passw: String!){
  login(email: $email, password: $passw) {
    token
  }
}
"""
login_var = { "email": dgraph_cloud_user, "passw": dgraph_cloud_passw}
login_info = gql_cloud_client.execute(query=login, variables=login_var)

token = login_info['data']['login']['token']
cerebro_headers = { "Content-Type": "application/json", "Authorization": "Bearer "+token }

print("Cerebro token retrieved.")


In [None]:
# We need to get the lambda deployment ID for the GraphQL endpoint
# note the double curly brackets to use format!
query = """
query {{
    searchDeployments(inputType: endpoint, searchText: "{0}") {{
        subdomain
        name
        uid
    }}
}}
""".format(dgraph_graphql_endpoint)

deployment_info = gql_cloud_client.execute(query=query, headers=cerebro_headers)
print(json.dumps(deployment_info, indent=2))
deploymentID = deployment_info['data']['searchDeployments'][0]['uid']

print('DeploymentID: '+deploymentID)


In [None]:
script = """
function dotProduct(v,w) {
   return v.reduce((l,r,i)=>l+r*w[i],0)
   // as openapi embedding vectors are normalized
   // dot product = cosine similarity
}

async function saveVector(nodetype,uid,vec) {
  try {
  let url = `"""+qdrnt_endpoint+"""/collections/"""+qdrnt_collection+"""/points?wait=true`;
  let body = `{ "points": [ { "id":${BigInt(uid).toString()}, "vector":[${vec}], "payload": { "type": "${nodetype}"} } ] }`
  let response = await fetch(url,{
    method: "PUT",
    headers: {
      "Content-Type": "application/json",
      "api-key": `"""+qdrnt_api_key+"""`
    },
    body: body
  })

  let data = await response.text();
  } catch (error) {
    console.log("Error during saveVector: "+ error)
  }

}

async function searchSimilarity(ID, vec,nodetype,limit=10) {
  try {
  let url = `"""+qdrnt_endpoint+"""/collections/"""+qdrnt_collection+"""/points/search?wait=true`;
  let body = `{
    "vector" : [${vec}],
    "filter" : {
            "must" : [ {
                "key":"type",
                "match": {
                    "value":"${nodetype}"
                }
            }],
            "must_not": [{"has_id":[${ID}]}]
        },
    "limit":${limit}
    }`
    let response = await fetch(url,{
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      "api-key": `"""+qdrnt_api_key+"""`
    },
    body: body
  })
  console.log("qdrant response status: "+ response.status);
    let resp = await response.text()
    let r1 = /"id":(?<id>\d+)/g
    let r2 = /"score":(?<score>\d*.\d*)/g
    data = []
    while ((m=r1.exec(resp)) != null) {
      data.push( {
        "id":m.groups['id']
      })
    }
    let i = 0
    while ((m=r2.exec(resp)) != null) {
      data[i]["score"]=m.groups['score']
      i++
    }
    if (response.status == 400) {
      console.log("Error 400 from qdrant: "+resp)
    }
    return data // array with id and score
    } catch (error) {
      console.log("Error getting qdrant response: "+ error)
    }
}
async function searchSimilarityByID(ID,nodetype,limit=10) {
  try {
  let url = `"""+qdrnt_endpoint+"""/collections/"""+qdrnt_collection+"""/points/`+ID;
  console.log(url)
    let response = await fetch(url,{
    method: "GET",
    headers: {
      "api-key": `"""+qdrnt_api_key+"""`
    },
  })
  console.log("qdrant response status: "+ response.status);
  const data = await response.json();

  const vector = data.result.vector;
  const issues = await searchSimilarity(ID,vector, "Issue", limit);
  return issues
  } catch (error) {
      console.log("Error getting qdrant response: "+ error)
    }

}

async function mutateRDF(dql,rdfs) {
  console.log("Mutation: " + rdfs)
  if (rdfs !== "") {
        return dql.mutate(`{
                set {
                    ${rdfs}
                }
            }`)
    }
}

async function mutateRDF(dql,rdfs) {
  console.log("Mutation: " + rdfs)
  if (rdfs !== "") {
        return dql.mutate(`{
                set {
                    ${rdfs}
                }
            }`)
    }
}

async function embedding(text) {
  console.log("getting embedding vector for: "+text)
  let url = `https://api.openai.com/v1/embeddings`;
  let response = await fetch(url,{
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      "Authorization": `Bearer """+OpenAIKey+"""`
    },
    body: `{ "input": "${text}", "model": "text-embedding-ada-002" }`
  })
  let data = await response.json();
  return data.data[0].embedding;
}

const similarIssuesField = ({ parent: { id },dql }) => findSimilarIssuesById(id, 5,dql);

async function getSimilarIssues({ args, dql }) {
  const description = args.description;
  const limit = args.first;
  const similar = await findSimilarIssues(description, limit, dql);
  return similar;
}
async function getSimilarIssuesById({ args, dql }) {
  //const description = args.description;
  const limit = args.first;
  const similar = await findSimilarIssuesById(args.uid, limit, dql);
  return similar;
}

async function findSimilarIssuesById(uid, limit, dql) {
  console.log("ID");
  console.log(uid)
  //id = id.substring(2); // remove the leading 0x
  let qdID = BigInt(uid).toString(); // convert hex to decimal
  console.log(qdID);
  //const v1 = await embedding(description);
  const issues = await searchSimilarityByID(qdID, "Issue", limit);
  //const issues = await searchSimilarity(v1, "Issue", limit);

  const ids = issues.map((issue) => '0x' + BigInt(issue.id).toString(16));
  const scores = {};
  issues.forEach((issue) => {
    scores['0x' + BigInt(issue.id).toString(16)] = issue.score;
  });

  const query = `{
    issues(func: uid(${ids})) {
      id: uid
      name: Issue.name
      description: Issue.description
      solution: Issue.solution
      oilRig: Issue.oilRig {
        name: OilRig.name
      }
    }
  }`;
  console.log("SimilarIssues Debug")
  console.log(query)
  const results = await dql.query(query);
  console.log("SimilarIssues Debug")
  console.log(results)
  results.data.issues.forEach((issue) => {
    issue.score = scores[issue.id];
  });

  return results.data.issues;
}

async function addIssueWebhook({ event, dql, graphql, authHeader }) {
  var rdfs = "";
  for (let i = 0; i < event.add.rootUIDs.length; ++i) {
    console.log(`adding embedding for Issue ${event.add.rootUIDs[i]} ${event.add.input[i]['name']}`);
    const uid = event.add.rootUIDs[i];
    const v1 = await embedding(event.add.input[i].description);
    await saveVector("Issue", uid, v1);
  similarCategories = await searchSimilarity(v1,"Issue",10);
    if  (event.add.input[i]['issue'] == undefined) { // if the project is added without category
      let issue="";
      let max = 0.0;
      let similarityMutation = "";
      for (let c of similarIssues ) {
          cuid = "0x"+BigInt(c.id).toString(16);
          similarityMutation += `<${uid}>  <similarity> <${cuid}> (cosine=${c.score}) .
         `;
          if (c.score > max) {
            issue = cuid;
            max = c.score;
          }

      }
      console.log(`set closest issue to ${issue}`)
      rdfs += `${similarityMutation}
              <${uid}> <Issue> <${issue}> .
                `;
    } else {
      rdfs += `<${uid}>  <embedding> "${serialized}" .
                `;
    }
  }
  await mutateRDF(dql,rdfs);
}

async function addOilRigWebhook({ event, dql, graphql, authHeader }) {
  var rdfs = "";
  console.log("Whole Event");
  console.log(JSON.stringify(event, null, 2));

  for (let i = 0; i < event.add.rootUIDs.length; ++i) {
    const oilRigUID = event.add.rootUIDs[i];
    console.log("Rig UID")
    console.log(oilRigUID)
    // Query for the oil rig and retrieve the UIDs of its issues
    const query = `query MyQuery {
  getOilRig(id: "${oilRigUID}") {
    issues {
      id
    }
  }
}`;
  console.log(query)
    const results = await graphql(query);
    console.log("queryResults")
    console.log(JSON.stringify(results));
    const issues = results.data.getOilRig.issues;

    if (issues && issues.length > 0) {
      console.log("ISSUES");
      console.log(issues);
      for (let j = 0; j < issues.length; j++) {
        const issue = issues[j];
        const issueUID = issue.id;
        console.log(issueUID);
        console.log(`Adding embedding for Issue ${issueUID}`);
        const v1 = await embedding(event.add.input[i].issues[j].description);
        await saveVector("Issue", issueUID, v1);

        rdfs += `<${issueUID}> <issueOf> <${oilRigUID}> .
                  `;
      }
    }
  }

  await mutateRDF(dql, rdfs);
}





self.addWebHookResolvers({
  "Issue.add": addIssueWebhook,
  "OilRig.add": addOilRigWebhook
});

self.addGraphQLResolvers({
  "Query.getSimilarIssues": getSimilarIssues,
  "Query.getSimilarIssuesById": getSimilarIssuesById,
  "Issue.similarIssues": similarIssuesField,
});


"""

encoded = base64.b64encode(script.encode('utf-8'))


mutation = """
mutation ($deploymentID: ID!, $tenantID: Int!,$lambdaScript: String! ){
  updateLambda(input: { deploymentID: $deploymentID, tenantID: $tenantID, lambdaScript: $lambdaScript})
}
"""
variables = {
    "deploymentID":deploymentID,
    "tenantID":0,
    "lambdaScript": str(encoded, "utf-8")
}
deployment_status = gql_cloud_client.execute(query=mutation, variables=variables,headers=cerebro_headers)


print(deployment_status)

In [None]:
# add predicates to Dgraph type schema
# we are using those 2 predicates in the lambda logic.
# if your cluster is in strict mode we must delcare the predicates before using them

dqlschema = """
  embedding: string .
  similarity: [uid] .
"""
op = pydgraph.Operation(schema=dqlschema)
client.alter(op)


# Check data was classified via LLM

In [None]:
# Verify projects were categorized into recommended (fuzzy) categories !
queryProjects = """
query queryProjects {
    queryProject(first:100) {
        id title
        category {
            name
        }
    }
}
"""
data = gql_client.execute(query=queryProjects)

print(json.dumps(data['data']['queryProject'], indent=2))


# Visualize some graph data

In [None]:
import ipycytoscape
from google.colab import output
output.enable_custom_widget_manager()

graph_data = {"nodes": [], "edges": []}
for p in data:
    graph_data['nodes'].append({"data": {"id": p['uid'], "label": p['title'], "type": p['dgraph.type'][0]}, "classes": p['dgraph.type'][0]})


cyto_styles = [
    {'selector': 'node[type = "Project"]', 'style': {
        'font-family': 'helvetica',
        'font-size': '6px',
        'label': 'data(label)',
        'background-color': 'blue'}}
]

cytoscapeobj = ipycytoscape.CytoscapeWidget()
cytoscapeobj.graph.add_graph_from_json(graph_data)
cytoscapeobj.set_style(cyto_styles)
cytoscapeobj.set_layout(name='cola', animate = True, nodeSpacing=10, edgeLengthVal=45, maxSimulationTime= 1500)

#display
cytoscapeobj

# Notes

## overview
This Python notebook sets up Dgraph to integrate graph data, LLM-based text analysis, and Vector-DB based fuzzy links into one overall Graph, and expose it via GraphQL.

Once this is run, GraphQL inserts and queries will transparently work to store all data, query all relationships, and knit fuzzy, AI-based links among data with actual data linkages from source systems.

The presented Python notebook code exhibits an advanced usage of Dgraph as a graph database, in combination with Qdrant as a vector database and OpenAI as a tool for generating embeddings. In this case, the graph database Dgraph is used for managing structured data, while the vector database Qdrant is used for managing and searching through high-dimensional data points, namely the embeddings generated by OpenAI.

The architecture suggested by this code leverages Dgraph's lambda features, which provide the capability to execute JavaScript functions in response to certain events, such as additions, updates, and deletions of data. In the current context, these JavaScript functions are used to manage the interactions between Dgraph, Qdrant, and OpenAI.

When new Oil Rigs/Issues are added in Dgraph, the lambda functions respectively are triggered. These functions call out to OpenAI's API to generate an embedding (vector) for each newly added entity, based on the text in the entity. These embedding vectors are then stored in Qdrant, a vector database.

to: ChatGPT.AI Assistant<|im_sep|>To understand the benefits of using Dgraph and Lambda functions for working with Large Language Models (LLMs) and vector databases, let's delve deeper into the structure and functionality of the presented code.

--



## Here are the key components of the code:

Dgraph: Dgraph is a distributed, transactional, open-source graph database built to provide Google-grade scale and speed, enabling the user to ask complex queries and receive responses in real time. Its horizontal scalability and distributed architecture make it an excellent choice for applications with heavy data and query loads. Dgraph uses GraphQL to both navigate graphs (networks) of data, and also to integrate multiple data sources into a single API.

Lambda Functions in Dgraph: Lambda functions are JavaScript functions that execute custom logic on the database side in response to GraphQL mutations and queries. In this case, the lambda functions search for similar vector representations of objects in the database. The Lambda feature allows more complex functionality to appear as part of a GraphQL API.

OpenAI API: OpenAI's API is used to generate embeddings for text. The 'text-embedding-ada-002' model transforms a text into a vector representation that captures its semantic content. The embedding is then saved in the Qdrant database and associated with the Dgraph node with the original text.

Qdrant Vector Database: Qdrant is a vector similarity search engine with extended functionality for manipulating vector collections. It is used to store and search for vector representations of objects (like projects or categories) to determine similarity between different entities.

## Now let's delve into how the system works as a whole:



Finding similar items: When a GraphQL request includes links to similar objects, a Lambda function is invoked to determine what is similar or related. The function generates an embedding for the input object text using the OpenAI API and then calls out to Qdrant to find similar items.


## Pulling it all together
In conclusion, this integration leverages the power of Large Language Models and vector databases with Dgraph. By associating semantic embeddings with graph nodes, we can perform similarity searches and automatically categorize entities. Because Dgraph exposes data via GraphQL, the process is transparent to users - they simply query the graph, and some of the "edgs" are actually AI generated similarity links. Dgraph itself internally stores all non-fuzzy relations and data values, allowing massive scale and fast query, together with AI, LLMs and similarity metrics.
