## Profile Summary Creation

This notebook extracts information about person from knowledge graph, creates a summary about that person and saves it as a property of the person node.

You need a .env file which is configured with your neo4j credentials (see knowledge_graph_construction.ipynb) and your OpenAI API key.

In [1]:
import os
from dotenv import load_dotenv
import json
from neo4j import GraphDatabase
from langchain.prompts.prompt import PromptTemplate
from langchain_openai import ChatOpenAI
import openai


In [2]:
# Load environment variables
load_dotenv()

True

#### 1. Configure OpenAI API and neo4j

In [3]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [4]:
neo4j_uri = os.getenv("NEO4J_URI")
neo4j_user = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")
driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))

#### 2. Function to fetch all employee IDs

In [5]:
def fetch_all_employees(session):
    query = """
    MATCH (p:Person)
    RETURN p.id AS employee_id
    """
    result = session.run(query)
    return [record['employee_id'] for record in result]

#### 3. Function to fetch employee data

In [6]:
def fetch_employee_data(employee_id, session):
    query = """
    MATCH (p:Person {id: $employee_id})
    OPTIONAL MATCH (p)-[:BELONGS_TO]->(t:Team),
                   (p)-[:REPORTS_TO]->(manager:Person),
                   (p)-[:WROTE]->(chat:chat_message),
                   (p)-[:RECEIVED]->(recchat:chat_message),
                   (p)-[:WROTE]->(email:email_message),
                   (p)-[:RECEIVED]->(recemail:email_message),
                   (p)-[:CREATED]->(doc:Document)
    WITH p, 
         collect(DISTINCT t) AS teams,
         collect(DISTINCT manager) AS managers,
         collect(DISTINCT chat) AS sentchatMessages,
         collect(DISTINCT recchat) AS receivedchatMessages,
         collect(DISTINCT email) AS sentemailMessages,
         collect(DISTINCT recemail) AS receivedemailMessages,
         collect(DISTINCT doc) AS documents
    RETURN p, teams, managers, sentchatMessages, receivedchatMessages, sentemailMessages, receivedemailMessages, documents
    """
    result = session.run(query, employee_id=employee_id)
    data = [record.data() for record in result]
    return json.dumps(data)  

In [38]:
# #only relevant for single test run
# # Fetch data for a specific employee (example ID)
# employee_data_json = fetch_employee_data(22)
# print(employee_data_json)

#### 4. Create a profile summary from the extracted information

In [7]:

# Function to call the OpenAI API
llm = ChatOpenAI(
    model="gpt-4o-mini",                
    temperature=0,                
    max_tokens=1500,                
)

def generate_profile_summary(employee_data_json):
    data = json.loads(employee_data_json)
    employee_data = data[0]  # Assuming single employee data
    prompt_template = """
    Create a concise profile summary for the following employee data:
    Name: {name}
    Role: {role}
    Team: {team}
    Manager: {manager}
    Sent Chat Messages: {sent_chat}
    Received Chat Messages: {received_chat}
    Sent Email Messages: {sent_email}
    Received Email Messages: {received_email}
    Documents Created: {documents}

    Use very precise and straight forward language.
    Don't dress up the text, remain factual instead.
    Provide a summary highlighting key skills, contributions, and role in the team.
    """

    # Prepare prompt data
    name = employee_data['p']['name']
    role = employee_data['p']['role']
    team = employee_data['teams'][0]['name'] if employee_data['teams'] else "N/A"
    manager = employee_data['managers'][0]['name'] if employee_data['managers'] else "N/A"
    sent_chat = ", ".join([msg['content'] for msg in employee_data['sentchatMessages']])
    received_chat = ", ".join([msg['content'] for msg in employee_data['receivedchatMessages']])
    sent_email = ", ".join([msg['subject_line'] for msg in employee_data['sentemailMessages']])
    received_email = ", ".join([msg['subject_line'] for msg in employee_data['receivedemailMessages']])
    documents = ", ".join([doc['document_name'] for doc in employee_data['documents']])

    # Create the prompt
    prompt = prompt_template.format(
        name=name, role=role, team=team, manager=manager,
        sent_chat=sent_chat, received_chat=received_chat,
        sent_email=sent_email, received_email=received_email,
        documents=documents
    )

    # Generate the summary using the LLM
    summary = llm(prompt)
    return summary.content

# only relevant for single test run
# Generate the profile summary
# profile_summary = generate_profile_summary(employee_data_json)
# print(profile_summary)


In [11]:
# Function to save summary and embedding
def save_profile_summary(employee_id, summary, session):
    query = """
    MATCH (p:Person {id: $employee_id})
    SET p.profile_summary = $summary
    RETURN p
    """
    result = session.run(query, employee_id=employee_id, summary=summary)
    data = [record.data() for record in result]
    return data


#### 5. Main execution loop

In [12]:
with driver.session() as session:
        employee_ids = fetch_all_employees(session)
        for employee_id in employee_ids:
            employee_data_json = fetch_employee_data(employee_id, session)
            profile_summary = generate_profile_summary(employee_data_json)
            if profile_summary:
                save_profile_summary(employee_id, profile_summary, session)
                print(f"Processed employee ID: {employee_id}")

Processed employee ID: 1
Processed employee ID: 2
Processed employee ID: 3
Processed employee ID: 4
Processed employee ID: 5
Processed employee ID: 6
Processed employee ID: 7
Processed employee ID: 8
Processed employee ID: 9
Processed employee ID: 10
Processed employee ID: 11
Processed employee ID: 12
Processed employee ID: 13
Processed employee ID: 14
Processed employee ID: 15
Processed employee ID: 16
Processed employee ID: 17
Processed employee ID: 18
Processed employee ID: 19
Processed employee ID: 20
Processed employee ID: 21
Processed employee ID: 22
Processed employee ID: 23
Processed employee ID: 24
Processed employee ID: 25
Processed employee ID: 26
Processed employee ID: 27
Processed employee ID: 28
Processed employee ID: 29
Processed employee ID: 30
Processed employee ID: 31
