## Connecting to Google Drive

In [None]:
from google.colab import drive

drive.mount('/content/drive/')

## Installing all the dependencies

In [None]:
%%capture
!pip install llama-index
!pip install openai
!pip install pypdf
!pip install --upgrade llama_index
!pip install datasets
!pip install nlpaug transformers nltk


# to use llama-index embeddings
!pip install llama-index-embeddings-openai

# to use arabert as the embedding model
# !pip install arabert
!pip install llama-index-embeddings-huggingface
# !pip install llama-index-llms-huggingface
!pip install transformers torch

# !pip install llama_index.core.node_parser
# %pip install jiwer gradio typing-extensions

In [None]:
data_folder = "your data folder path"
PERSIST_DIR = "your vector store index path"

QA_CSV = "CSV for QA"


In [None]:
import os
import openai
import json
import ast
import tiktoken
import re
import pandas as pd
import nltk
import numpy as np

nltk.download('wordnet')

import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw


from sklearn.metrics import precision_recall_fscore_support
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_score, recall_score, f1_score


from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI

from datasets import DatasetDict

# setting up the API key to use OpenAI API
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"  # replace with your OpenAI API key


## Useful functions

In [None]:

def count_tokens(text):

    # Initialize the encoder for the specific model
    encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")

    # Encode the prompt to get the token count
    tokenized_prompt = encoder.encode(text)
    token_count = len(tokenized_prompt)
    return token_count

def parse_mitre_techniques(response):
    """
    Extracts MITRE Technique IDs from a response string and returns them as a Python list.

    Args:
        response (str): The response string containing MITRE Technique IDs.

    Returns:
        list: A list of extracted MITRE Technique IDs.
    """
    # Define a regular expression to match MITRE Technique IDs (e.g., "T1234")
    mitre_pattern = r'T\d{4}'

    # Find all matches in the response string
    mitre_techniques = re.findall(mitre_pattern, response)

    return mitre_techniques

def calculate_metrics(true_labels, predicted_labels):
    """
    Calculates TP, FP, FN, Precision, Recall, and F1-score for a single prediction.

    Args:
        true_labels (list): Ground truth technique IDs.
        predicted_labels (list): Predicted technique IDs.

    Returns:
        dict: A dictionary with TP, FP, FN, precision, recall, and F1-score.
    """

    # print(true_labels)

    true_set = set(true_labels)
    predicted_set = set(predicted_labels)

    # Calculate TP, FP, and FN
    tp = len(true_set & predicted_set)
    fp = len(predicted_set - true_set)
    fn = len(true_set - predicted_set)

    # Calculate precision, recall, and F1-score
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

    return {
        "tp": tp,
        "fp": fp,
        "fn": fn,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }


**<h2>Zero Shot Prompting</h2>**

In [None]:


# Load the dataset
dataset_path = 'your dataset path'  # Update this if the file is in a different location

df = pd.read_csv(dataset_path)

# # Try to load the JSON data with error handling
# try:
#     # First try reading it as a regular JSON file
#     df = pd.read_json(dataset_path)
# except ValueError:
#     # If it fails, try reading it line by line
#     with open(dataset_path, 'r') as f:
#         data = [json.loads(line) for line in f]
#     df = pd.DataFrame(data)

# Initialize variables
ground_truth = df['str_label']

prompts = df['TEXT-flow_fn_threat'].tolist()


responses = []

client = openai.OpenAI()

# token_count = []

# for prompt in prompts:
#     token_count.append(count_tokens(prompt))

# print(max(token_count))

# Function to query OpenAI GPT model
def query_gpt(prompt):
    try:
        gpt_response = client.chat.completions.create(
        model = "gpt-4o-mini-2024-07-18",
        messages= [
            {
                "role":"system",
                "content":'''You are a helpful assisstant. Your name is Transportation Security AI. Your role is to help with transportation security.
                Their are some instructions in each prompt. Follow those instructions strictly.'''
            },
            {
                "role":"user",
                "content":prompt
            }
        ],
        temperature=0.0,
        # change this parameter if the token size is different for your input
        max_tokens=6000,
        top_p=0,
        )

        return str(gpt_response.choices[0].message.content)
    except Exception as e:
        print(f"Error querying GPT: {e}")
        return None

# Get GPT responses for each prompt
i = 0
for prompt in prompts:
    if i == 20:
        break
    i += 1
    print(i)
    final_prompt =  f'''I am trying to do a multilabel classification of information flow description from Intelligent Transportation System (ITS) to MITRE ATT&CK Techniques.
                Here we have information flow name, its initiator and acceptor (Physcial Object), functional objects and process description associated with it, security characteristics of the information flow and the description of the information flow itself.
                We also have the STRIDE based threat information for the information flow.
                An attacker may attempt to compromise the integrity, confidentiality, or availability of the information flow in many ways.
                Find all the relevant MITRE ATT&CK techniques that the attacker might use to attack the information flow.

                Follow the instructions below carefully.

                1. We have a predefined list of MITRE ATT&CK Techniques that consists of 63 MITRE Techniques. You have to choose only the relevant MITRE ATT&CK Techniques from this list that is relevant to the information flow given.

                2. Understand the entire context, then generate a sublist of MITRE ATT&CK Technique from the given list.

                3. Do not add any other description in your answer.

                4. Only return the Technique IDs in python list format

                Given MITRE Technique List = ['T1495','T1485','T1595','T1134','T1040','T1132','T1098','T1069','T1036','T1562','T1187','T1486','T1119','T1027','T1498','T1654','T1548','T1082','T1552','T1614','T1531','T1204','T1529','T1046','T1489','T1195','T1566','T1659','T1059','T1213','T1133','T1080','T1005','T1078','T1001','T1190','T1203','T1136','T1491','T1033','T1189','T1068','T1652','T1049','T1020','T1041','T1021','T1105','T1518','T1200','T1053','T1557','T1056','T1087','T1565','T1499','T1657','T1559','T1074','T1106','T1560', 'T1556', 'T1589']
                You should return MITRE Techniques from this list.

                Here is the information flow description:
                {prompt}

                Which are the relevant MITRE ATT&CK Techniques from the given list that the attacker might use to attack the information flow? Return the Technique IDs in python list format.
                '''

    response = query_gpt(final_prompt)
    list_response = parse_mitre_techniques(response)
    # print(list_response)
    responses.append(list_response)

print(len(responses))

# Compare responses to ground truth
true_positives, false_positives, false_negatives = 0, 0, 0

# Initialize results
predictions_with_metrics = []

for truth, pred in zip(ground_truth, responses):
    p_truth = re.findall(r"T\d+", truth)
    # print(p_truth)
    metrics = calculate_metrics(p_truth, pred)

    predictions_with_metrics.append({
        "true_label": p_truth,
        "predicted_label": pred,
        **metrics
    })


# Compute overall metrics
overall_tp = sum(row["tp"] for row in predictions_with_metrics)
overall_fp = sum(row["fp"] for row in predictions_with_metrics)
overall_fn = sum(row["fn"] for row in predictions_with_metrics)

overall_precision = overall_tp / (overall_tp + overall_fp) if (overall_tp + overall_fp) > 0 else 0
overall_recall = overall_tp / (overall_tp + overall_fn) if (overall_tp + overall_fn) > 0 else 0
overall_f1 = 2 * overall_precision * overall_recall / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0



Calculate the precision, recall and f1 score

In [None]:
# Compare responses to ground truth
true_positives, false_positives, false_negatives = 0, 0, 0

# Initialize results
predictions_with_metrics = []

for truth, pred in zip(ground_truth, responses):
    p_truth = truth
    # print(p_truth)
    metrics = calculate_metrics(p_truth, pred)

    predictions_with_metrics.append({
        "true_label": p_truth,
        "predicted_label": pred,
        **metrics
    })


# Compute overall metrics
overall_tp = sum(row["tp"] for row in predictions_with_metrics)
overall_fp = sum(row["fp"] for row in predictions_with_metrics)
overall_fn = sum(row["fn"] for row in predictions_with_metrics)

overall_precision = overall_tp / (overall_tp + overall_fp) if (overall_tp + overall_fp) > 0 else 0
overall_recall = overall_tp / (overall_tp + overall_fn) if (overall_tp + overall_fn) > 0 else 0
overall_f1 = 2 * overall_precision * overall_recall / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0



Save the predictions

In [None]:
# Save predictions with metrics
predictions_with_metrics_path = data_folder + "prediction/zero_shot_flow_fn_process_threat_20.json"
with open(predictions_with_metrics_path, "w") as f:
    json.dump(predictions_with_metrics, f, indent=4)

# Save overall metrics
overall_metrics_path = data_folder + "prediction/zero_shot_flow_fn_process_threat_20_overall.json"
with open(overall_metrics_path, "w") as f:
    json.dump({
        "overall_precision": overall_precision,
        "overall_recall": overall_recall,
        "overall_f1": overall_f1
    }, f, indent=4)

print(f"Predictions with metrics saved to {predictions_with_metrics_path}")
print(f"Overall metrics saved to {overall_metrics_path}")

**<h2>One Shot Prompting</h2>**

In [None]:

# Load the dataset
dataset_path = 'your dataset path'  # Update this if the file is in a different location

# Try to load the JSON data with error handling
try:
    # First try reading it as a regular JSON file
    df = pd.read_json(dataset_path)
except ValueError:
    # If it fails, try reading it line by line
    with open(dataset_path, 'r') as f:
        data = [json.loads(line) for line in f]
    df = pd.DataFrame(data)

# Initialize variables
ground_truth = df['str_label']

prompts = df['TEXT-flow_fn_threat'].tolist()

responses = []

client = openai.OpenAI()
# Function to query OpenAI GPT model
def query_gpt(prompt):
    try:
        gpt_response = client.chat.completions.create(
        model = "gpt-4o-mini-2024-07-18",
        messages= [
            {
                "role":"system",
                "content":'''You are a helpful assisstant. Your name is Transportation Security AI. Your role is to help with transportation security.
                Their are some instructions in each prompt. Follow those instructions strictly.'''
            },
            {
                "role":"user",
                "content":prompt
            }
        ],
        temperature=0.0,
        max_tokens=7000,
        top_p=0,
        )

        return str(gpt_response.choices[0].message.content)
    except Exception as e:
        print(f"Error querying GPT: {e}")
        return None

# i = 0
# Get GPT responses for each prompt
for prompt in prompts:

    # In the final prompt, we used an example. You can choose a different example or modify it as needed.
    final_prompt =  f'''I am trying to do a multilabel classification of information flow description from Intelligent Transportation System (ITS) to MITRE ATT&CK Techniques.
                Here we have information flow name, its source and destination, some functional object description associated with it and the description of the information flow itself.
                An attacker may attempt to compromise the integrity, confidentiality, or availability of the information flow in many ways.
                Find all the relevant MITRE ATT&CK techniques that the attacker might use to attack the information flow.

                Follow the instructions below carefully.

                1. We have a predefined list of MITRE ATT&CK Techniques that consists of 63 MITRE Techniques. You have to choose only the relevant MITRE ATT&CK Techniques from this list that is relevant to the information flow given.

                2. Understand the entire context, then generate a sublist of MITRE ATT&CK Technique from the given list.

                3. Do not add any other description in your answer.

                4. Only return the Technique IDs in python list format

                Given MITRE Technique List = ['T1495','T1485','T1595','T1134','T1040','T1132','T1098','T1069','T1036','T1562','T1187','T1486','T1119','T1027','T1498','T1654','T1548','T1082','T1552','T1614','T1531','T1204','T1529','T1046','T1489','T1195','T1566','T1659','T1059','T1213','T1133','T1080','T1005','T1078','T1001','T1190','T1203','T1136','T1491','T1033','T1189','T1068','T1652','T1049','T1020','T1041','T1021','T1105','T1518','T1200','T1053','T1557','T1056','T1087','T1565','T1499','T1657','T1559','T1074','T1106','T1560', 'T1556', 'T1589']

                Here is an example:

                Information Flows: signal_system_configuration
                Initiator: Traffic Management Center
                Acceptor: ITS Roadway Equipment
                Requires Authentication?: Yes
                Requires Encryption?: Yes
                Confidentiality requirement of information flow: Low
                Integrity requirement of information flow: High
                Availability requirement of information flow: Moderate

                Description of the initiator:
                The 'Traffic Management Center' monitors and controls traffic and the road network. It represents centers that manage a broad range of transportation facilities including freeway systems, rural and suburban highway systems, and urban and suburban traffic control systems. It communicates with ITS Roadway Equipment and Connected Vehicle Roadside Equipment (RSE) to monitor and manage traffic flow and monitor the condition of the roadway, surrounding environmental conditions, and field equipment status. It manages traffic and transportation resources to support allied agencies in responding to, and recovering from, incidents ranging from minor traffic incidents through major disasters.

                                Function-0 of the : 'TMC Reversible Lane Management' remotely monitors and controls reversible lanes. It provides an interface to reversible lane field equipment (traffic sensors, surveillance equipment, lane control signals, physical lane access controls, etc.) and to traffic operations personnel to support central monitoring and control of these facilities.

                Description of the Acceptor:
                'ITS Roadway Equipment' represents the ITS equipment that is distributed on and along the roadway that monitors and controls traffic and monitors and manages the roadway. This physical object includes traffic detectors, environmental sensors, traffic signals, highway advisory radios, dynamic message signs, CCTV cameras and video image processing systems, grade crossing warning systems, and ramp metering systems. Lane management systems and barrier systems that control access to transportation infrastructure such as roadways, bridges and tunnels are also included. This object also provides environmental monitoring including sensors that measure road conditions, surface weather, and vehicle emissions. Work zone systems including work zone surveillance, traffic control, driver warning, and work crew safety systems are also included.

                                Function-0:'Roadway Reversible Lanes' includes field elements that monitor and control reversible lane facilities. It includes the traffic sensors, surveillance equipment, lane control signals, physical lane access controls, and other field elements that manage traffic on these facilities. It provides current reversible lane facility status information and accepts requests and control commands from the controlling center.

                Definition of signal_system_configuration information flow:
                Data used to configure traffic signal systems including configuring control sections and mode of operation (time based or traffic responsive).

                STRIDE based threat information associated with the information flow:

                Threat Title:Potential Lack of Input Validation for Roadway Reversible Lanes;Threat Category:Tampering;Description:Data flowing across TMC Reversible Lane management- Roadway Reversible Lanes: (2B) reversible lane control + lane management control + signal system configuration+ signal control device configuration+ signal control plans + signal control commands may be tampered with by an attacker. This may lead to a denial of service attack against Roadway Reversible Lanes or an elevation of privilege attack against Roadway Reversible Lanes or an information disclosure by Roadway Reversible Lanes. Failure to verify that input is as expected is a root cause of a very large number of exploitable issues. Consider all paths and the way they handle data. Verify that all input is verified for correctness using an approved list input validation approach.;Threat Title:Potential Data Repudiation by Roadway Reversible Lanes;Threat Category:Repudiation;Description:Roadway Reversible Lanes claims that it did not receive data from a source outside the trust boundary. Consider using logging or auditing to record the source, time, and summary of the received data.;Threat Title:Potential Process Crash or Stop for Roadway Reversible Lanes;Threat Category:Denial Of Service;Description:Roadway Reversible Lanes crashes, halts, stops or runs slowly; in all cases violating an availability metric.;Threat Title:Data Flow TMC Reversible Lane management- Roadway Reversible Lanes: (2B) reversible lane control + lane management control + signal system configuration+ signal control device configuration+ signal control plans + signal control commands Is Potentially Interrupted;Threat Category:Denial Of Service;Description:An external agent interrupts data flowing across a trust boundary in either direction.;Threat Title:Elevation Using Impersonation;Threat Category:Elevation Of Privilege;Description:Roadway Reversible Lanes may be able to impersonate the context of TMC Reversible Lane management in order to gain additional privilege.;Threat Title:Roadway Reversible Lanes May be Subject to Elevation of Privilege Using Remote Code Execution;Threat Category:Elevation Of Privilege;Description:TMC Reversible Lane management may be able to remotely execute code for Roadway Reversible Lanes.;Threat Title:Elevation by Changing the Execution Flow in Roadway Reversible Lanes;Threat Category:Elevation Of Privilege;Description:An attacker may pass data into Roadway Reversible Lanes in order to change the flow of program execution within Roadway Reversible Lanes to the attacker's choosing.;Threat Title:Cross Site Request Forgery;Threat Category:Elevation Of Privilege;Description:Cross-site request forgery (CSRF or XSRF) is a type of attack in which an attacker forces a user's browser to make a forged request to a vulnerable site by exploiting an existing trust relationship between the browser and the vulnerable web site.  In a simple scenario, a user is logged in to web site A using a cookie as a credential.  The other browses to web site B.  Web site B returns a page with a hidden form that posts to web site A.  Since the browser will carry the user's cookie to web site A, web site B now can take any action on web site A, for example, adding an admin to an account.  The attack can be used to exploit any requests that the browser automatically authenticates, e.g. by session cookie, integrated authentication, IP whitelisting etc.  The attack can be carried out in many ways such as by luring the victim to a site under control of the attacker, getting the user to click a link in a phishing email, or hacking a reputable web site that the victim will visit. The issue can only be resolved on the server side by requiring that all authenticated state-changing requests include an additional piece of secret payload (canary or CSRF token) which is known only to the legitimate web site and the browser and which is protected in transit through SSL/TLS. See the Forgery Protection property on the flow stencil for a list of mitigations.;Threat Title:Authenticated Data Flow Compromised;Threat Category:Tampering;Description:An attacker can read or modify data transmitted over an authenticated dataflow.
                Threat Title:Potential Lack of Input Validation for Roadway Reversible Lanes;Threat Category:Tampering;Description:Data flowing across signal_system_configuration commands may be tampered with by an attacker. This may lead to a denial of service attack against Roadway Reversible Lanes or an elevation of privilege attack against Roadway Reversible Lanes or an information disclosure by Roadway Reversible Lanes. Failure to verify that input is as expected is a root cause of a very large number of exploitable issues. Consider all paths and the way they handle data. Verify that all input is verified for correctness using an approved list input validation approach.
                Threat Title:Potential Data Repudiation by Roadway Reversible Lanes;Threat Category:Repudiation;Description:Roadway Reversible Lanes claims that it did not receive data from a source outside the trust boundary. Consider using logging or auditing to record the source, time, and summary of the received data.
                Threat Title:Potential Process Crash or Stop for Roadway Reversible Lanes;Threat Category:Denial Of Service;Description:Roadway Reversible Lanes crashes, halts, stops or runs slowly; in all cases violating an availability metric.
                Threat Title:Data Flow signal_system_configuration commands Is Potentially Interrupted;Threat Category:Denial Of Service;Description:An external agent interrupts data flowing across a trust boundary in either direction.
                Threat Title:Elevation Using Impersonation;Threat Category:Elevation Of Privilege;Description:Roadway Reversible Lanes may be able to impersonate the context of TMC Reversible Lane Mgmt in order to gain additional privilege.
                Threat Title:Roadway Reversible Lanes May be Subject to Elevation of Privilege Using Remote Code Execution;Threat Category:Elevation Of Privilege;Description:TMC Reversible Lane Mgmt may be able to remotely execute code for Roadway Reversible Lanes.
                Threat Title:Elevation by Changing the Execution Flow in Roadway Reversible Lanes;Threat Category:Elevation Of Privilege;Description:An attacker may pass data into Roadway Reversible Lanes in order to change the flow of program execution within Roadway Reversible Lanes to the attacker's choosing.
                Threat Title:Cross Site Request Forgery;Threat Category:Elevation Of Privilege;Description:Cross-site request forgery (CSRF or XSRF) is a type of attack in which an attacker forces a user's browser to make a forged request to a vulnerable site by exploiting an existing trust relationship between the browser and the vulnerable web site.  In a simple scenario, a user is logged in to web site A using a cookie as a credential.  The other browses to web site B.  Web site B returns a page with a hidden form that posts to web site A.  Since the browser will carry the user's cookie to web site A, web site B now can take any action on web site A, for example, adding an admin to an account.  The attack can be used to exploit any requests that the browser automatically authenticates, e.g. by session cookie, integrated authentication, IP whitelisting etc.  The attack can be carried out in many ways such as by luring the victim to a site under control of the attacker, getting the user to click a link in a phishing email, or hacking a reputable web site that the victim will visit. The issue can only be resolved on the server side by requiring that all authenticated state-changing requests include an additional piece of secret payload (canary or CSRF token) which is known only to the legitimate web site and the browser and which is protected in transit through SSL/TLS. See the Forgery Protection property on the flow stencil for a list of mitigations.
                Threat Title:Authenticated Data Flow Compromised;Threat Category:Tampering;Description:An attacker can read or modify data transmitted over an authenticated dataflow.-------------------------------------------------------------------

                --------------------------------------

                Classified Labels in python list format:
                ['T1040','T1059','T1078','T1105','T1190','T1195','T1495','T1552','T1557','T1565']

                Here is your information flow description:
                {prompt}

                Which are the relevant MITRE ATT&CK Techniques from the given list that the attacker might use to attack the information flow? Return the Technique IDs in python list format.
                '''

    response = query_gpt(final_prompt)
    list_response = parse_mitre_techniques(response)
    # print(list_response)
    responses.append(list_response)

print(len(responses))

# Compare responses to ground truth
true_positives, false_positives, false_negatives = 0, 0, 0

# Initialize results
predictions_with_metrics = []

for truth, pred in zip(ground_truth, responses):
    p_truth = truth
    # print(p_truth)
    metrics = calculate_metrics(p_truth, pred)

    predictions_with_metrics.append({
        "true_label": p_truth,
        "predicted_label": pred,
        **metrics
    })


# Compute overall metrics
overall_tp = sum(row["tp"] for row in predictions_with_metrics)
overall_fp = sum(row["fp"] for row in predictions_with_metrics)
overall_fn = sum(row["fn"] for row in predictions_with_metrics)

overall_precision = overall_tp / (overall_tp + overall_fp) if (overall_tp + overall_fp) > 0 else 0
overall_recall = overall_tp / (overall_tp + overall_fn) if (overall_tp + overall_fn) > 0 else 0
overall_f1 = 2 * overall_precision * overall_recall / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0



433


In [None]:
# Save predictions with metrics
predictions_with_metrics_path = data_folder + "prediction/one_shot_flow_fn_threat.json"
with open(predictions_with_metrics_path, "w") as f:
    json.dump(predictions_with_metrics, f, indent=4)

# Save overall metrics
overall_metrics_path = data_folder + "prediction/one_shot_flow_fn_threat_overall.json"
with open(overall_metrics_path, "w") as f:
    json.dump({
        "overall_precision": overall_precision,
        "overall_recall": overall_recall,
        "overall_f1": overall_f1
    }, f, indent=4)

print(f"Predictions with metrics saved to {predictions_with_metrics_path}")
print(f"Overall metrics saved to {overall_metrics_path}")

**<h2>Few Shot Prompting</h2>**

In [None]:

# Load the dataset
dataset_path = 'your dataset path'  # Update this if the file is in a different location

df = pd.read_csv(dataset_path)
# # Try to load the JSON data with error handling
# try:
#     # First try reading it as a regular JSON file
#     df = pd.read_json(dataset_path)
# except ValueError:
#     # If it fails, try reading it line by line
#     with open(dataset_path, 'r') as f:
#         data = [json.loads(line) for line in f]
#     df = pd.DataFrame(data)

# Initialize variables
ground_truth = df['str_label']

prompts = df['TEXT-flow_fn_process_threat'].tolist()
responses = []

client = openai.OpenAI()
# Function to query OpenAI GPT model
def query_gpt(prompt):
    try:
        gpt_response = client.chat.completions.create(
        model = "gpt-4o-mini-2024-07-18",
        messages= [
            {
                "role":"system",
                "content":'''You are a helpful assisstant. Your name is Transportation Security AI. Your role is to help with transportation security.
                Their are some instructions in each prompt. Follow those instructions strictly.'''
            },
            {
                "role":"user",
                "content":prompt
            }
        ],
        temperature=0.0,
        max_tokens=9000,
        top_p=0,
        )

        return str(gpt_response.choices[0].message.content)
    except Exception as e:
        print(f"Error querying GPT: {e}")
        return None
i = 0

for prompt in prompts:

        if i == 20:
            break
        i += 1

        # In few shot, you can add multiple examples to the prompt. This example used 3 examples. The more intelligently you choose the examples, the better the model will perform.
        final_prompt =  f'''I am trying to do a multilabel classification of information flow description from Intelligent Transportation System (ITS) to MITRE ATT&CK Techniques.
                Here we have information flow name, its initiator and acceptor (Physcial Object), functional objects and process description associated with it, security characteristics of the information flow and the description of the information flow itself.
                We also have the STRIDE based threat information for the information flow.
                An attacker may attempt to compromise the integrity, confidentiality, or availability of the information flow in many ways.
                Find all the relevant MITRE ATT&CK techniques that the attacker might use to attack the information flow.

                Follow the instructions below carefully.

                1. We have a predefined list of MITRE ATT&CK Techniques that consists of 63 MITRE Techniques. You have to choose only the relevant MITRE ATT&CK Techniques from this list that is relevant to the information flow given.

                2. Understand the entire context, then generate a sublist of MITRE ATT&CK Technique from the given list.

                3. Do not add any other description in your answer.

                4. Only return the Technique IDs in python list format

                Given MITRE Technique List = ['T1495','T1485','T1595','T1134','T1040','T1132','T1098','T1069','T1036','T1562','T1187','T1486','T1119','T1027','T1498','T1654','T1548','T1082','T1552','T1614','T1531','T1204','T1529','T1046','T1489','T1195','T1566','T1659','T1059','T1213','T1133','T1080','T1005','T1078','T1001','T1190','T1203','T1136','T1491','T1033','T1189','T1068','T1652','T1049','T1020','T1041','T1021','T1105','T1518','T1200','T1053','T1557','T1056','T1087','T1565','T1499','T1657','T1559','T1074','T1106','T1560', 'T1556', 'T1589']
                You should return MITRE Techniques from this list.

                Here are a few examples:
                    -------------------------------------------------------------------
                    Example 1:
                    "Information Flows: signal_system_configuration
                    Initiator: Traffic Management Center
                    Acceptor: ITS Roadway Equipment
                    Requires Authentication?: Yes
                    Requires Encryption?: Yes
                    Confidentiality requirement of information flow: Low
                    Integrity requirement of information flow: High
                    Availability requirement of information flow: Moderate

                    Description of the initiator:
                    The 'Traffic Management Center' monitors and controls traffic and the road network. It represents centers that manage a broad range of transportation facilities including freeway systems, rural and suburban highway systems, and urban and suburban traffic control systems. It communicates with ITS Roadway Equipment and Connected Vehicle Roadside Equipment (RSE) to monitor and manage traffic flow and monitor the condition of the roadway, surrounding environmental conditions, and field equipment status. It manages traffic and transportation resources to support allied agencies in responding to, and recovering from, incidents ranging from minor traffic incidents through major disasters.

                                    Function-0 of the : 'TMC Reversible Lane Management' remotely monitors and controls reversible lanes. It provides an interface to reversible lane field equipment (traffic sensors, surveillance equipment, lane control signals, physical lane access controls, etc.) and to traffic operations personnel to support central monitoring and control of these facilities.
                    Processes:
                    Select Strategy-This process shall select the appropriate traffic control strategy to be implemented over a road and/or freeway section served by the specific instance of the Manage Traffic function. The strategy shall be selected by the process from a number that are available, e.g., adaptive control, fixed time control, local operations. The selected strategy shall be passed by the process to the actual control processes for implementation according to the part of the network to which it is to be applied, i.e., surface roads, freeways (i.e., limited access roads), ramps and/or parking lots. The definition of strategy can be extended to include a strategy for the operations of sensors such as video cameras used to provide traffic surveillance data. Initial strategies, based on the time of day, may be input by the traffic operations personnel. The process shall make it possible for the current strategy selection to be modified to accommodate the effects of such things as archived and predicted traffic usage, incidents, emergency vehicle preemption, the passage of commercial vehicles with unusual loads, equipment faults and overrides from the traffic operations personnel. The strategy for control of freeways and parking lots is through use of DMS signs and lane indicators. The strategy for control of ramps is through the timing plans for ramp meters. The selected strategy shall be sent to the process within the Provide Traffic Surveillance facility responsible for maintaining the store of long term data.

                    Description of the Acceptor:
                    'ITS Roadway Equipment' represents the ITS equipment that is distributed on and along the roadway that monitors and controls traffic and monitors and manages the roadway. This physical object includes traffic detectors, environmental sensors, traffic signals, highway advisory radios, dynamic message signs, CCTV cameras and video image processing systems, grade crossing warning systems, and ramp metering systems. Lane management systems and barrier systems that control access to transportation infrastructure such as roadways, bridges and tunnels are also included. This object also provides environmental monitoring including sensors that measure road conditions, surface weather, and vehicle emissions. Work zone systems including work zone surveillance, traffic control, driver warning, and work crew safety systems are also included.

                                    Function-0:'Roadway Reversible Lanes' includes field elements that monitor and control reversible lane facilities. It includes the traffic sensors, surveillance equipment, lane control signals, physical lane access controls, and other field elements that manage traffic on these facilities. It provides current reversible lane facility status information and accepts requests and control commands from the controlling center.

                    Processes:
                    Process Indicator Output Data for Roads-This process shall implement the indicator output data generated by other processes within the Manage Traffic function for use on the roads (surface streets) served by the function. It shall perform the functions needed to provide traffic control at intersections or pedestrian crossings, or provide the interface for data to be sent to the units (or systems) that manage reversible lanes, multimodal crossings or highway-rail intersections. This process shall monitor the status of the indicator equipment and provide data to the Manage Maintenance and Construction function to help that process determine whether the indicator is operating correctly or a repair is needed.

                    Definition of signal_system_configuration information flow:
                    Data used to configure traffic signal systems including configuring control sections and mode of operation (time based or traffic responsive).

                    STRIDE based threat information associated with the information flow:
                    Threat Title:Potential Lack of Input Validation for Roadway Reversible Lanes;Threat Category:Tampering;Description:Data flowing across signal_system_configuration commands may be tampered with by an attacker. This may lead to a denial of service attack against Roadway Reversible Lanes or an elevation of privilege attack against Roadway Reversible Lanes or an information disclosure by Roadway Reversible Lanes. Failure to verify that input is as expected is a root cause of a very large number of exploitable issues. Consider all paths and the way they handle data. Verify that all input is verified for correctness using an approved list input validation approach.
                    Threat Title:Potential Data Repudiation by Roadway Reversible Lanes;Threat Category:Repudiation;Description:Roadway Reversible Lanes claims that it did not receive data from a source outside the trust boundary. Consider using logging or auditing to record the source, time, and summary of the received data.
                    Threat Title:Potential Process Crash or Stop for Roadway Reversible Lanes;Threat Category:Denial Of Service;Description:Roadway Reversible Lanes crashes, halts, stops or runs slowly; in all cases violating an availability metric.
                    Threat Title:Data Flow signal_system_configuration commands Is Potentially Interrupted;Threat Category:Denial Of Service;Description:An external agent interrupts data flowing across a trust boundary in either direction.
                    Threat Title:Elevation Using Impersonation;Threat Category:Elevation Of Privilege;Description:Roadway Reversible Lanes may be able to impersonate the context of TMC Reversible Lane Mgmt in order to gain additional privilege.
                    Threat Title:Roadway Reversible Lanes May be Subject to Elevation of Privilege Using Remote Code Execution;Threat Category:Elevation Of Privilege;Description:TMC Reversible Lane Mgmt may be able to remotely execute code for Roadway Reversible Lanes.
                    Threat Title:Elevation by Changing the Execution Flow in Roadway Reversible Lanes;Threat Category:Elevation Of Privilege;Description:An attacker may pass data into Roadway Reversible Lanes in order to change the flow of program execution within Roadway Reversible Lanes to the attacker's choosing.
                    Threat Title:Cross Site Request Forgery;Threat Category:Elevation Of Privilege;Description:Cross-site request forgery (CSRF or XSRF) is a type of attack in which an attacker forces a user's browser to make a forged request to a vulnerable site by exploiting an existing trust relationship between the browser and the vulnerable web site.  In a simple scenario, a user is logged in to web site A using a cookie as a credential.  The other browses to web site B.  Web site B returns a page with a hidden form that posts to web site A.  Since the browser will carry the user's cookie to web site A, web site B now can take any action on web site A, for example, adding an admin to an account.  The attack can be used to exploit any requests that the browser automatically authenticates, e.g. by session cookie, integrated authentication, IP whitelisting etc.  The attack can be carried out in many ways such as by luring the victim to a site under control of the attacker, getting the user to click a link in a phishing email, or hacking a reputable web site that the victim will visit. The issue can only be resolved on the server side by requiring that all authenticated state-changing requests include an additional piece of secret payload (canary or CSRF token) which is known only to the legitimate web site and the browser and which is protected in transit through SSL/TLS. See the Forgery Protection property on the flow stencil for a list of mitigations.
                    Threat Title:Authenticated Data Flow Compromised;Threat Category:Tampering;Description:An attacker can read or modify data transmitted over an authenticated dataflow."-------------------------------------------------------------------
                    -------------------------------------------------------------------
                    Classified Labels in python list format:
                    ['T1040','T1059','T1078','T1105','T1190','T1195','T1495','T1552','T1557','T1565']
                    -------------------------------------------------------------------
                    Example 2:
                    Information Flows: reversible_lane_control
                    Initiator: Traffic Management Center
                    Acceptor: ITS Roadway Equipment
                    Requires Authentication?: Yes
                    Requires Encryption?: Yes
                    Confidentiality requirement of information flow: Moderate
                    Integrity requirement of information flow: High
                    Availability requirement of information flow: Moderate

                    Description of the Initiator:
                    The 'Traffic Management Center' monitors and controls traffic and the road network. It represents centers that manage a broad range of transportation facilities including freeway systems, rural and suburban highway systems, and urban and suburban traffic control systems. It communicates with ITS Roadway Equipment and Connected Vehicle Roadside Equipment (RSE) to monitor and manage traffic flow and monitor the condition of the roadway, surrounding environmental conditions, and field equipment status. It manages traffic and transportation resources to support allied agencies in responding to, and recovering from, incidents ranging from minor traffic incidents through major disasters.

                      Function-0:  'TMC Reversible Lane Management' remotely monitors and controls reversible lanes. It provides an interface to reversible lane field equipment (traffic sensors, surveillance equipment, lane control signals, physical lane access controls, etc.) and to traffic operations personnel to support central monitoring and control of these facilities.

                      Processes:
                      - Output Control Data for Roads: This process shall transfer data to processes responsible for controlling equipment located at the roadside within the road (surface street) network served by the Manage Traffic function to support traffic control. This process shall also control the reversible lane facilities equipment required to change the direction of traffic flow along surface streets.  Data for use by in-vehicle signage equipment shall be sent to another process for output to roadside processes.  All data shall be sent to this process by processes within the Manage Traffic function.  This process shall also be responsible for the monitoring of input data showing the way in which the indicators are responding to the data that they are being sent, and the reporting of any errors in their responses as faults.  The reported data shall include the operational status (state of the device and configuration) from the indicator device.  All output and input data shall be sent by the process to another process in the Manage Traffic function to be loaded into the store of long term data.
                      - Output Control Data for Freeways: This process shall transfer data to processes responsible for controlling equipment located at the roadside within the freeway network served by the Manage Traffic function. The traffic metering devices remotely controlled by this process could include ramp, interchange, tunnels, mainline meters, HOV lane usage signals, HOV lane control systems, and reversible lane facilities equipment required to change the direction of traffic flow along a freeway system.  Data for use by in-vehicle signage equipment shall be sent to another process for output to roadside processes.  This process shall also be responsible for the monitoring of input data showing the way in which the indicators are responding to the data that they are being sent, and the reporting of any errors in their responses as faults.  The reported data shall include the operational status (state of the device and configuration) from the indicator device.  All output and input data shall be sent by the process to another process in the Manage Traffic function to be loaded into the store of long term data.

                    Description of the Acceptor:
                    'ITS Roadway Equipment' represents the ITS equipment that is distributed on and along the roadway that monitors and controls traffic and monitors and manages the roadway. This physical object includes traffic detectors, environmental sensors, traffic signals, highway advisory radios, dynamic message signs, CCTV cameras and video image processing systems, grade crossing warning systems, and ramp metering systems. Lane management systems and barrier systems that control access to transportation infrastructure such as roadways, bridges and tunnels are also included. This object also provides environmental monitoring including sensors that measure road conditions, surface weather, and vehicle emissions. Work zone systems including work zone surveillance, traffic control, driver warning, and work crew safety systems are also included.

                      Function-0:  'Roadway Reversible Lanes' includes field elements that monitor and control reversible lane facilities. It includes the traffic sensors, surveillance equipment, lane control signals, physical lane access controls, and other field elements that manage traffic on these facilities. It provides current reversible lane facility status information and accepts requests and control commands from the controlling center.

                      Processes:
                      - Process Indicator Output Data for Roads: This process shall implement the indicator output data generated by other processes within the Manage Traffic function for use on the roads (surface streets) served by the function. It shall perform the functions needed to provide traffic control at intersections or pedestrian crossings, or provide the interface for data to be sent to the units (or systems) that manage reversible lanes, multimodal crossings or highway-rail intersections.  This process shall monitor the status of the indicator equipment and provide data to the Manage Maintenance and Construction function to help that process determine whether the indicator is operating correctly or a repair is needed.
                      - Process Indicator Output Data for Freeways: This process shall implement the indicator output data generated by other processes within the Manage Traffic function for use on freeways served by the function.  It shall perform the functions needed to output control data to traffic meters and lane use indicators including those used for ramp, interchange, and mainline metering, reversible lanes, high-occupancy vehicle (HOV) or high-occupancy toll (HOT) lanes.  This process shall provide the interface for data to be sent to the units (or systems) that manage multimodal crossings.  This process shall monitor the status of the indicator equipment and provide data to the Manage Maintenance and Construction function to help that process determine whether the indicator is operating correctly or a repair is needed.

                    Definition of reversible_lane_control information flow:
                    Control of automated reversible lane configuration and driver information systems.

                    STRIDE based threat information associated with the information flow:
                    Threat Title:Potential Lack of Input Validation for Roadway Reversible Lanes;Threat Category:Tampering;Description:Data flowing across reversible_lane_control may be tampered with by an attacker. This may lead to a denial of service attack against Roadway Reversible Lanes or an elevation of privilege attack against Roadway Reversible Lanes or an information disclosure by Roadway Reversible Lanes. Failure to verify that input is as expected is a root cause of a very large number of exploitable issues. Consider all paths and the way they handle data. Verify that all input is verified for correctness using an approved list input validation approach.

                    Threat Title:Potential Data Repudiation by Roadway Reversible Lanes;Threat Category:Repudiation;Description:Roadway Reversible Lanes claims that it did not receive data from a source outside the trust boundary. Consider using logging or auditing to record the source, time, and summary of the received data.

                    Threat Title:Potential Process Crash or Stop for Roadway Reversible Lanes;Threat Category:Denial Of Service;Description:Roadway Reversible Lanes crashes, halts, stops or runs slowly; in all cases violating an availability metric.

                    Threat Title:Data Flow reversible_lane_control Is Potentially Interrupted;Threat Category:Denial Of Service;Description:An external agent interrupts data flowing across a trust boundary in either direction.

                    Threat Title:Elevation Using Impersonation;Threat Category:Elevation Of Privilege;Description:Roadway Reversible Lanes may be able to impersonate the context of TMC Reversible Lane Mgmt in order to gain additional privilege.

                    Threat Title:Roadway Reversible Lanes May be Subject to Elevation of Privilege Using Remote Code Execution;Threat Category:Elevation Of Privilege;Description:TMC Reversible Lane Mgmt may be able to remotely execute code for Roadway Reversible Lanes.

                    Threat Title:Elevation by Changing the Execution Flow in Roadway Reversible Lanes;Threat Category:Elevation Of Privilege;Description:An attacker may pass data into Roadway Reversible Lanes in order to change the flow of program execution within Roadway Reversible Lanes to the attacker's choosing.

                    Threat Title:Cross Site Request Forgery;Threat Category:Elevation Of Privilege;Description:Cross-site request forgery (CSRF or XSRF) is a type of attack in which an attacker forces a user's browser to make a forged request to a vulnerable site by exploiting an existing trust relationship between the browser and the vulnerable web site.  In a simple scenario, a user is logged in to web site A using a cookie as a credential.  The other browses to web site B.  Web site B returns a page with a hidden form that posts to web site A.  Since the browser will carry the user's cookie to web site A, web site B now can take any action on web site A, for example, adding an admin to an account.  The attack can be used to exploit any requests that the browser automatically authenticates, e.g. by session cookie, integrated authentication, IP whitelisting etc.  The attack can be carried out in many ways such as by luring the victim to a site under control of the attacker, getting the user to click a link in a phishing email, or hacking a reputable web site that the victim will visit. The issue can only be resolved on the server side by requiring that all authenticated state-changing requests include an additional piece of secret payload (canary or CSRF token) which is known only to the legitimate web site and the browser and which is protected in transit through SSL/TLS. See the Forgery Protection property on the flow stencil for a list of mitigations.

                    Threat Title:Authenticated Data Flow Compromised;Threat Category:Tampering;Description:An attacker can read or modify data transmitted over an authenticated dataflow."
                    -------------------------------------------------------------------
                    Classified Labels in python list format:
                    ['T1041', 'T1059', 'T1105', 'T1190', 'T1495', 'T1557', 'T1565']
                    -------------------------------------------------------------------
                    Example 3:
                    Information Flows: traffic_image_meta_data
                    Initiator: ITS Roadway Equipment
                    Acceptor: Traffic Management Center
                    Requires Authentication?: Yes
                    Requires Encryption?: False
                    Confidentiality requirement of information flow: Low
                    Integrity requirement of information flow: Moderate
                    Availability requirement of information flow: Moderate

                    Description of the Initiator:
                    'ITS Roadway Equipment' represents the ITS equipment that is distributed on and along the roadway that monitors and controls traffic and monitors and manages the roadway. This physical object includes traffic detectors, environmental sensors, traffic signals, highway advisory radios, dynamic message signs, CCTV cameras and video image processing systems, grade crossing warning systems, and ramp metering systems. Lane management systems and barrier systems that control access to transportation infrastructure such as roadways, bridges and tunnels are also included. This object also provides environmental monitoring including sensors that measure road conditions, surface weather, and vehicle emissions. Work zone systems including work zone surveillance, traffic control, driver warning, and work crew safety systems are also included.

                      Function-0:  'Roadway Basic Surveillance' monitors traffic conditions using fixed equipment such as loop detectors and CCTV cameras.
                      Processes:
                      - Process Traffic Sensor Data: This process shall be responsible for collecting traffic sensor data.   This data shall include traffic parameters such as speed, volume, and occupancy, as well as video images of the traffic. The process shall collect pedestrian images and pedestrian sensor data. The process shall collect reversible lane, multimodal crossing and high occupancy vehicle (HOV)/high occupancy toll (HOT) lane sensor data.  Where any of the data is provided in analog form, the process shall be responsible for converting it into digital form and calibrating.  The converted data shall be sent to other processes for distribution, further analysis and storage. The process shall accept inputs to control the sensors and return operational status (state of the sensor device, configuration, and fault data) to the controlling process.
                      - Process Traffic Images: This process shall process raw traffic image data received from devices located on the road (surface street) and freeway network served by the Manage Traffic function.  The process shall transform the raw data into images that can be sent to another process for incident or work zone intrusion detection.  It shall also act as the control interface through which the images of traffic conditions can be changed by the traffic operations personnel and maintenance and construction center personnel, who shall also be supplied with images for viewing.  This process shall also provide operational status (state of the device, configuration, and fault data) to other processes in the Manage Traffic and Manage Maintenance and Construction functions that are monitoring the health of field equipment so that repairs can be scheduled by those other processes if deemed necessary.

                    Description of the Acceptor:
                    The 'Traffic Management Center' monitors and controls traffic and the road network. It represents centers that manage a broad range of transportation facilities including freeway systems, rural and suburban highway systems, and urban and suburban traffic control systems. It communicates with ITS Roadway Equipment and Connected Vehicle Roadside Equipment (RSE) to monitor and manage traffic flow and monitor the condition of the roadway, surrounding environmental conditions, and field equipment status. It manages traffic and transportation resources to support allied agencies in responding to, and recovering from, incidents ranging from minor traffic incidents through major disasters.

                      Function-0:  'TMC Basic Surveillance' remotely monitors and controls traffic sensor systems and surveillance (e.g., CCTV) equipment, and collects, processes and stores the collected traffic data. Current traffic information and other real-time transportation information is also collected from other centers. The collected information is provided to traffic operations personnel and made available to other centers.

                      Processes:
                      -Process Traffic Data: This process shall receive and process data from sensors at the roadway. This data includes sensor and video data coming from traffic sensors as well as inputs for pedestrians, multimodal crossings, parking facilities, highway rail intersections, high-occupancy vehicle (HOV) / high-occupancy toll (HOT) and reversible lanes. The process distributes data to Provide Device Control processes that are responsible for freeway, highway rail intersections, tunnels, parking lot, and surface street management. It also sends the data to another Provide Traffic Surveillance process for loading into the stores of current and long term data. Information about the various sensors to aid in this processing and distribution of data is accessed from the data store static_data_for_sensor_processing.

                    Definition of traffic_image_meta_data information flow:
                    Meta data that describes traffic images. Traffic images (video) are in another flow.

                    STRIDE based threat information associated with the information flow:
                    Threat Title:Potential Lack of Input Validation for TMC Basic Surveillance;Threat Category:Tampering;Description:Data flowing across (2B) traffic images + traffic image meta data+ traffic detector data may be tampered with by an attacker. This may lead to a denial of service attack against TMC Basic Surveillance or an elevation of privilege attack against TMC Basic Surveillance or an information disclosure by TMC Basic Surveillance. Failure to verify that input is as expected is a root cause of a very large number of exploitable issues. Consider all paths and the way they handle data. Verify that all input is verified for correctness using an approved list input validation approach.

                    Threat Title:Potential Data Repudiation by TMC Basic Surveillance;Threat Category:Repudiation;Description:TMC Basic Surveillance claims that it did not receive data from a source outside the trust boundary. Consider using logging or auditing to record the source, time, and summary of the received data.

                    Threat Title:Data Flow Sniffing;Threat Category:Information Disclosure;Description:Data flowing across (2B) traffic images + traffic image meta data+ traffic detector data may be sniffed by an attacker. Depending on what type of data an attacker can read, it may be used to attack other parts of the system or simply be a disclosure of information leading to compliance violations. Consider encrypting the data flow.

                    Threat Title:Potential Process Crash or Stop for TMC Basic Surveillance;Threat Category:Denial Of Service;Description:TMC Basic Surveillance crashes, halts, stops or runs slowly; in all cases violating an availability metric.

                    Threat Title:Data Flow (2B) traffic images + traffic image meta data+ traffic detector data Is Potentially Interrupted;Threat Category:Denial Of Service;Description:An external agent interrupts data flowing across a trust boundary in either direction.

                    Threat Title:Elevation Using Impersonation;Threat Category:Elevation Of Privilege;Description:TMC Basic Surveillance may be able to impersonate the context of Roadway Basic Surveillance in order to gain additional privilege.

                    Threat Title:TMC Basic Surveillance May be Subject to Elevation of Privilege Using Remote Code Execution;Threat Category:Elevation Of Privilege;Description:Roadway Basic Surveillance may be able to remotely execute code for TMC Basic Surveillance.

                    Threat Title:Elevation by Changing the Execution Flow in TMC Basic Surveillance;Threat Category:Elevation Of Privilege;Description:An attacker may pass data into TMC Basic Surveillance in order to change the flow of program execution within TMC Basic Surveillance to the attacker's choosing.

                    Threat Title:Cross Site Request Forgery;Threat Category:Elevation Of Privilege;Description:Cross-site request forgery (CSRF or XSRF) is a type of attack in which an attacker forces a user's browser to make a forged request to a vulnerable site by exploiting an existing trust relationship between the browser and the vulnerable web site.  In a simple scenario, a user is logged in to web site A using a cookie as a credential.  The other browses to web site B.  Web site B returns a page with a hidden form that posts to web site A.  Since the browser will carry the user's cookie to web site A, web site B now can take any action on web site A, for example, adding an admin to an account.  The attack can be used to exploit any requests that the browser automatically authenticates, e.g. by session cookie, integrated authentication, IP whitelisting etc.  The attack can be carried out in many ways such as by luring the victim to a site under control of the attacker, getting the user to click a link in a phishing email, or hacking a reputable web site that the victim will visit. The issue can only be resolved on the server side by requiring that all authenticated state-changing requests include an additional piece of secret payload (canary or CSRF token) which is known only to the legitimate web site and the browser and which is protected in transit through SSL/TLS. See the Forgery Protection property on the flow stencil for a list of mitigations.

                    Threat Title:Authenticated Data Flow Compromised;Threat Category:Tampering;Description:An attacker can read or modify data transmitted over an authenticated dataflow."
                    -------------------------------------------------------------------
                    Classified Labels in python list format:
                    ['T1020', 'T1040', 'T1059', 'T1105', 'T1190' ,'T1557', 'T1565']
                    -------------------------------------------------------------------

                    Now, here is your information flow description:
                    {prompt}

                    Which are the relevant MITRE ATT&CK Techniques from the given list that the attacker might use to attack the information flow? Return the Technique IDs in python list format.
                    '''

        response = query_gpt(final_prompt)
        list_response = parse_mitre_techniques(response)
        # print(list_response)
        responses.append(list_response)

# Compare responses to ground truth
true_positives, false_positives, false_negatives = 0, 0, 0

# Initialize results
predictions_with_metrics = []

for truth, pred in zip(ground_truth, responses):
    p_truth = re.findall(r"T\d+", truth)
    # p_truth = truth
    # print(p_truth)
    metrics = calculate_metrics(p_truth, pred)

    predictions_with_metrics.append({
        "true_label": p_truth,
        "predicted_label": pred,
        **metrics
    })


# Compute overall metrics
overall_tp = sum(row["tp"] for row in predictions_with_metrics)
overall_fp = sum(row["fp"] for row in predictions_with_metrics)
overall_fn = sum(row["fn"] for row in predictions_with_metrics)

overall_precision = overall_tp / (overall_tp + overall_fp) if (overall_tp + overall_fp) > 0 else 0
overall_recall = overall_tp / (overall_tp + overall_fn) if (overall_tp + overall_fn) > 0 else 0
overall_f1 = 2 * overall_precision * overall_recall / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0



In [None]:

# Save predictions with metrics
predictions_with_metrics_path = data_folder + "prediction/3_shot_flow_fn_proc_threat_20.json"
with open(predictions_with_metrics_path, "w") as f:
    json.dump(predictions_with_metrics, f, indent=4)

# Save overall metrics
overall_metrics_path = data_folder + "prediction/3_shot_flow_fn_proc_threat_20_overall.json"
with open(overall_metrics_path, "w") as f:
    json.dump({
        "overall_precision": overall_precision,
        "overall_recall": overall_recall,
        "overall_f1": overall_f1
    }, f, indent=4)

print(f"Predictions with metrics saved to {predictions_with_metrics_path}")
print(f"Overall metrics saved to {overall_metrics_path}")