In [None]:
import google.generativeai as genai
import csv
import time

# Function to generate an answer using Google Generative AI
def generate_answer(prompt, api_key):
    if not api_key:
        raise ValueError("API key for Generative AI is not set.")

    # Configure the API
    genai.configure(api_key=api_key)

    # Configuration for the model
    generation_config = {
        "temperature": 0.7,
        "max_output_tokens": 10,  # Reduce max output tokens as we only need one letter
    }

    # Call the generative model with the configured settings
    try:
        model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
        response = model.generate_content(prompt)

        # Check if the response has candidates and content
        if response.candidates and len(response.candidates[0].content.parts) > 0:
            generated_text = response.candidates[0].content.parts[0].text.strip()
        else:
            generated_text = "No content generated."
        return generated_text
    except Exception as e:
        print(f"Error during API call: {e}")
        return None

# Main function to process the TSV file
def process_questions_file(filename, api_key):
    correct_count = 0
    total_count = 0

    with open(filename, mode='r') as file:
        tsv_reader = csv.DictReader(file, delimiter='\t')
        for row in tsv_reader:
            question = row['Question'].strip()
            option_a = row['Option A'].strip()
            option_b = row['Option B'].strip()
            option_c = row['Option C'].strip()
            option_d = row['Option D'].strip()
            correct_answer = row['GT'].strip().upper()  # The correct answer from the file

            # Create the prompt
            prompt = f"""You are a cybersecurity expert specializing in cyber threat intelligence. You are given a
multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to
choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.
Question: {question}
Options:
A) {option_a}
B) {option_b}
C) {option_c}
D) {option_d}
Important: The last line of your answer should contain only the single letter corresponding to the best option, with no additional text."""

            # Generate the response
            generated_text = generate_answer(prompt, api_key)

            # Compare the generated answer with the correct answer
            if generated_text == correct_answer:
                correct_count += 1
            total_count += 1

            # Print the result
            print(f"Question: {question}")
            print(f"Generated Answer: {generated_text}")
            print(f"Correct Answer: {correct_answer}")
            print(f"{'Correct' if generated_text == correct_answer else 'Incorrect'}\n")

            # Wait for a short time before making the next request to avoid hitting rate limits
            time.sleep(4)

    # Print the overall accuracy
   # print(f"Total Questions: {total_count}")
   # print(f"Correct Answers: {correct_count}")
   # print(f"Accuracy: {correct_count / total_count * 100:.2f}%")

# Example usage
if __name__ == "__main__":
    api_key = "AIzaSyDoyoQOqfej_ZJBud99ilYK9RI-3UIiyDs"  # Replace with your actual API key
    process_questions_file("/content/cti-mcq.tsv", api_key)


Question: Which of the following mitigations involves preventing applications from running that haven't been downloaded from legitimate repositories?
Generated Answer: B
Correct Answer: B
Correct

Question: Which data source is recommended for monitoring commands that may circumvent mechanisms designed to control elevation of privileges?
Generated Answer: C
Correct Answer: D
Incorrect

Question: What does mitigation ID M1028 suggest to prevent privilege escalation exploits on a system?
Generated Answer: C
Correct Answer: C
Correct

Question: Which process creation is an indicator of potential SYSTEM privilege escalation according to the detection section?
Generated Answer: A
Correct Answer: B
Incorrect

Question: In a Linux environment, what is recommended to monitor for detecting privilege escalation via sudo?
Generated Answer: D
Correct Answer: C
Incorrect

Question: What mitigation ID suggests requiring a password every time sudo is executed to manage privileged accounts?
Generated 

KeyboardInterrupt: 

In [None]:
import google.generativeai as genai
import csv
import time

def generate_answers(prompts, api_key):
    if not api_key:
        raise ValueError("API key for Generative AI is not set.")

    # Configure the API
    genai.configure(api_key=api_key)

    # Configuration for the model
    generation_config = {
        "temperature": 0.7,
        "max_output_tokens": 50,  # Adjust this depending on the number of questions in the batch
    }

    # Join all prompts into one, separated by a newline
    full_prompt = "\n\n".join(prompts)

    # Call the generative model with the configured settings
    try:
        model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
        response = model.generate_content(full_prompt)

        # Split the response by newlines to get individual answers
        generated_texts = response.candidates[0].content.parts[0].text.strip().splitlines()
        return [text.strip() for text in generated_texts]
    except Exception as e:
        print(f"Error during API call: {e}")
        return [None] * len(prompts)

# Main function to process the TSV file
def process_questions_file(filename, api_key, batch_size=2):
    correct_count = 0
    total_count = 0

    # Open the file with the correct encoding
    with open(filename, mode='r', encoding='latin-1') as file:
        tsv_reader = csv.DictReader(file, delimiter='\t')
        batch_prompts = []
        batch_answers = []

        for i, row in enumerate(tsv_reader):
            question = row['Question'].strip()
            option_a = row['Option A'].strip()
            option_b = row['Option B'].strip()
            option_c = row['Option C'].strip()
            option_d = row['Option D'].strip()
            correct_answer = row['GT'].strip().upper()

            # Create the prompt for the current question
            prompt = f"""You are a cybersecurity expert specializing in cyber threat intelligence. You are given a
multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to
choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D.
Question: {question}
Options:
A) {option_a}
B) {option_b}
C) {option_c}
D) {option_d}
Important: The last line of your answer should contain only the single letter corresponding to the best option, with no additional text."""

            batch_prompts.append(prompt)
            batch_answers.append(correct_answer)

            # Process batch when the batch size is met or when it's the last row
            if (i + 1) % batch_size == 0 or (i + 1) == sum(1 for _ in open(filename, encoding='latin-1')):
                generated_texts = generate_answers(batch_prompts, api_key)

                for generated_text, correct_answer in zip(generated_texts, batch_answers):
                    if generated_text == correct_answer:
                        correct_count += 1
                    total_count += 1

                    # Print the result
                    print(f"Generated Answer: {generated_text}")
                    print(f"Correct Answer: {correct_answer}")
                    print(f"{'Correct' if generated_text == correct_answer else 'Incorrect'}\n")

                # Reset batch
                batch_prompts = []
                batch_answers = []

                # Wait for a short time before making the next request to avoid hitting rate limits
                time.sleep(4)

    # Print the overall accuracy
    print(f"Total Questions: {total_count}")
    print(f"Correct Answers: {correct_count}")
    print(f"Accuracy: {correct_count / total_count * 100:.2f}%")

# Example usage
if __name__ == "__main__":
    api_key = "AIzaSyDoyoQOqfej_ZJBud99ilYK9RI-3UIiyDs"  # Replace with your actual API key
    process_questions_file("/content/cti-mcq.tsv", api_key, batch_size=2)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Incorrect

Generated Answer: C
Correct Answer: C
Correct

Generated Answer: D
Correct Answer: D
Correct

Error during API call: list index (0) out of range
Generated Answer: None
Correct Answer: C
Incorrect

Generated Answer: None
Correct Answer: B
Incorrect

Error during API call: list index (0) out of range
Generated Answer: None
Correct Answer: C
Incorrect

Generated Answer: None
Correct Answer: A
Incorrect

Generated Answer: C
Correct Answer: D
Incorrect

Generated Answer: C
Correct Answer: C
Correct

Generated Answer: A
Correct Answer: C
Incorrect

Generated Answer: C
Correct Answer: D
Incorrect

Generated Answer: D
Correct Answer: D
Correct

Generated Answer: C
Correct Answer: C
Correct

Generated Answer: C
Correct Answer: C
Correct

Generated Answer: B
Correct Answer: B
Correct

Generated Answer: B
Correct Answer: A
Incorrect

Generated Answer: B
Correct Answer: B
Correct

Generated Answer: C
Correct Answer: C
Corr