In [12]:
#!pip install graphviz

import dspy
import os
import openai
import json
from dotenv import load_dotenv

from graphviz import Digraph
from IPython.display import display

In [13]:
def set_dspy():
    # ==============set openAI enviroment=========
    # Path to your API key file
    key_file_path = "openai_api_key.txt"

    # Load the API key from the file
    with open(key_file_path, "r") as file:
        openai_api_key = file.read().strip()

    # Set the API key as an environment variable
    os.environ["OPENAI_API_KEY"] = openai_api_key
    openai.api_key = os.environ["OPENAI_API_KEY"]
    turbo = dspy.OpenAI(model="gpt-3.5-turbo", max_tokens=2000, temperature=0.5)
    dspy.settings.configure(lm=turbo)
    return turbo
    # ==============end of set openAI enviroment=========


def set_dspy_hardcode_openai_key():
    os.environ["OPENAI_API_KEY"] = (
        "sk-proj-yourapikeyhere"
    )
    openai.api_key = os.environ["OPENAI_API_KEY"]
    turbo = dspy.OpenAI(model="gpt-3.5-turbo",  temperature=0, max_tokens=2000)
    dspy.settings.configure(lm=turbo)
    return turbo

turbo=set_dspy()
# comment out set_dspy() and use set_dspy_hardcode_openai_key is your option
# turbo=set_dspy_hardcode_openai_key()

In [14]:
def load_text_file(file_path):
    """
    Load a text file and return its contents as a string.

    Parameters:
    file_path (str): The path to the text file.

    Returns:
    str: The contents of the text file.
    """
    try:
        with open(file_path, "r") as file:
            contents = file.read()
        return contents
    except FileNotFoundError:
        return "File not found."
    except Exception as e:
        return f"An error occurred: {e}"


In [15]:
conversation=load_text_file("conversation.txt")
print(conversation)

Alice: Hey Bob, I just got a strange email from support@banksecure.com. It says I need to verify my account details urgently. The subject line was "Urgent: Verify Your Account Now". The email looks suspicious to me.

Bob: Hi Alice, that does sound fishy. Can you forward me the email? I’ll take a look at the headers to see where it came from.

Alice: Sure, forwarding it now.

Bob: Got it. Let’s see... The email came from IP address 192.168.10.45, but the domain banksecure.com is not their official domain. It's actually registered to someone in Russia.

Alice: That’s definitely not right. Should I be worried?

Bob: We should investigate further. Did you click on any links or download any attachments?

Alice: I did click on a link that took me to a page asking for my login credentials. I didn't enter anything though. The URL was http://banksecure-verification.com/login.

Bob: Good call on not entering your details. Let’s check the URL. This domain was just registered two days ago. It’s hi

In [16]:
class EvidenceIdentifier(dspy.Signature):
    """Idenitfy evidence entities from a conversation between -Alex (IT Security Specialist) and Taylor (Employee)."""

    question = dspy.InputField(
        desc="a conversation between -Alex (IT Security Specialist) and Bob (Employee)."
    )
    answer = dspy.OutputField(
        desc="a list of evidence, inlcuding but not limited to emaile, IP address, URL, File name, timestamps, etc, in the conversation as a Python dictionary. For example, {evidence type: evidence value, ...}"
    )

In [17]:
def generate_answer(signature, conversation, output_file):
    generate_answer = dspy.Predict(signature)
    answer=generate_answer(question=conversation).answer  # here we use the module

    with open(output_file, "w") as json_file:
        result = json.loads(answer)
        print(result)
        json.dump(result, json_file, indent=4)
    print(f"The evidence has been saved to the file {output_file}")

In [18]:
output_file = "01_output_entity.txt"
generate_answer(
    EvidenceIdentifier, conversation, 
    output_file,
)

{'Email From': 'support@banksecure.com', 'Email Subject': 'Urgent: Verify Your Account Now', 'IP Address': '192.168.10.45', 'Domain': 'banksecure.com', 'Actual Domain Registration': 'Russia', 'URL Clicked': 'http://banksecure-verification.com/login', 'URL Visited 1': 'http://banksecure-verification.com/login', 'URL Visited 2': 'http://banksecure-verification.com/account-details', 'File Downloaded': 'AccountDetails.exe', 'File Creation Time': '10:20 AM', 'MD5 Hash': 'e99a18c428cb38d5f260853678922e03', 'Network Logs Timestamp': '10:20 AM'}
The evidence has been saved to the file 01_output_evidence_entity.txt


In [19]:
turbo.inspect_history(n=1)




Idenitfy evidence entities from a conversation between -Alex (IT Security Specialist) and Taylor (Employee).

---

Follow the following format.

Question: a conversation between -Alex (IT Security Specialist) and Bob (Employee).
Answer: a list of evidence, inlcuding but not limited to emaile, IP address, URL, File name, timestamps, etc, in the conversation as a Python dictionary. For example, {evidence type: evidence value, ...}

---

Question: Alice: Hey Bob, I just got a strange email from support@banksecure.com. It says I need to verify my account details urgently. The subject line was "Urgent: Verify Your Account Now". The email looks suspicious to me. Bob: Hi Alice, that does sound fishy. Can you forward me the email? I’ll take a look at the headers to see where it came from. Alice: Sure, forwarding it now. Bob: Got it. Let’s see... The email came from IP address 192.168.10.45, but the domain banksecure.com is not their official domain. It's actually registered to someone in Ru

'\n\n\nIdenitfy evidence entities from a conversation between -Alex (IT Security Specialist) and Taylor (Employee).\n\n---\n\nFollow the following format.\n\nQuestion: a conversation between -Alex (IT Security Specialist) and Bob (Employee).\nAnswer: a list of evidence, inlcuding but not limited to emaile, IP address, URL, File name, timestamps, etc, in the conversation as a Python dictionary. For example, {evidence type: evidence value, ...}\n\n---\n\nQuestion: Alice: Hey Bob, I just got a strange email from support@banksecure.com. It says I need to verify my account details urgently. The subject line was "Urgent: Verify Your Account Now". The email looks suspicious to me. Bob: Hi Alice, that does sound fishy. Can you forward me the email? I’ll take a look at the headers to see where it came from. Alice: Sure, forwarding it now. Bob: Got it. Let’s see... The email came from IP address 192.168.10.45, but the domain banksecure.com is not their official domain. It\'s actually registered 

In [20]:
class EvidenceRelationIdentifier(dspy.Signature):
    """Idenitfy evidence entities and their relationships from a conversation between -Alex (IT Security Specialist) and Taylor (Employee)."""

    question = dspy.InputField(
        desc="a conversation between -Alex (IT Security Specialist) and Bob (Employee)."
    )

    answer_relations: str = dspy.OutputField(
        desc="relatioinships between evidence entities. Output in JSON format: {Relationship name: evidence -> evidence, ...}."
    )
    
    answer_evidence : str = dspy.OutputField(
        desc="a list of evidence type and the value, inlcuding but not limited to emaile, IP address, URL, File name, timestamps, etc, idenified from the conversation. Output in JSON format: {evidence type: evidence value, ...}"
    )

In [21]:
# deal with multiple output fields
def generate_answers(
    signature, conversation, output_file, attributes_to_extract=["answer"]
):
    generate_answer = dspy.Predict(signature)
    result = generate_answer(question=conversation)  # Call the module
    print(result)

    # Write the answers to the JSON file
    with open(output_file, "w") as json_file:
        # Extract specified attributes
        for attr in attributes_to_extract:
            if hasattr(result, attr):
                # print(attr)
                # print(getattr(result, attr))
                # json_file.write(getattr(result, attr))
                results = json.loads(getattr(result, attr))

                json.dump(results, json_file, indent=4)

            else:
                print(f"Warning: Attribute '{attr}' not found in the result.")

    print(f"The evidence has been saved to the file {output_file}")

In [22]:
output_file = "01_output_entity_relation.txt"
generate_answers(
    EvidenceRelationIdentifier,
    conversation,
    output_file,
    ["answer_evidence", "answer_relations"],
)

Prediction(
    answer_relations='{\n  "Email Header Analysis": "IP Address -> Domain",\n  "URL Analysis": "URL -> Domain",\n  "Browser History Analysis": "URL -> Timestamp",\n  "File Analysis": "File Name -> Timestamp, File Name -> MD5 Hash",\n  "Malware Analysis": "MD5 Hash -> Malware Database"\n}',
    answer_evidence='{\n  "Email Sender": "support@banksecure.com",\n  "Email Subject": "Urgent: Verify Your Account Now",\n  "IP Address": "192.168.10.45",\n  "Domain": "banksecure.com",\n  "Domain Registration": "Russia",\n  "URL": "http://banksecure-verification.com/login",\n  "URL Registration Date": "Two days ago",\n  "File Name": "AccountDetails.exe",\n  "File Creation Timestamp": "10:20 AM",\n  "MD5 Hash": "e99a18c428cb38d5f260853678922e03"\n}'
)
The evidence has been saved to the file 01_output_evidence_entity_relation.txt
