In [None]:
setx GEMINI_API_KEY "gemini_key"


SyntaxError: invalid syntax (<ipython-input-1-b1243f5518b7>, line 1)

In [None]:
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai
from google.colab import userdata
import csv

# Function to scrape content from a URL
def scrape_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return ""

# Function to generate an answer using the Gemini API
def generate_answer(prompt, scraped_content):
    combined_content = f"{prompt}\n\n{scraped_content}"

    # Retrieve API key from user data
    gemini_key = userdata.get('gemini_key')
    if not gemini_key:
        raise ValueError("API key for Gemini is not set. Please set the GEMINI_API_KEY environment variable.")

    # Configure the Gemini API
    genai.configure(api_key=gemini_key)

    # Configuration of the model
    generation_config = {
        "temperature": 0.9,
        "top_p": 1,
        "top_k": 5,
        "max_output_tokens": 2048,
    }

    # Call the generative model with the configured settings
    model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
    response = model.generate_content(combined_content)

    # Extract the text from the response
    if response.candidates and len(response.candidates) > 0:
        # Assuming text is in response.candidates[0].parts[0].text
        generated_text = response.candidates[0]  # Adjust based on the structure
    else:
        generated_text = "No content generated."

    return generated_text

# Function to save the generated text to a TSV file
def save_to_tsv(generated_text, filename='questions.tsv'):
    with open(filename, 'w', newline='') as tsvfile:
        writer = csv.writer(tsvfile, delimiter='\t')
        writer.writerow(['GeneratedContent'])
        writer.writerow([generated_text])

# Main function to process input and generate the output
def main(prompt, urls):
    scraped_content = ""
    for url in urls:
        scraped_content += scrape_content(url) + "\n\n"

    if not scraped_content.strip():
        print("No content scraped from the provided URLs.")
        return

    generated_text = generate_answer(prompt, scraped_content)
    print("Generated Answer:")
    print(generated_text)

    save_to_tsv(generated_text)

if __name__ == "__main__":
    # Example usage
    input_prompt = """Prompt for generating MCQs with ChatGPT-4o from MITRE ATT&CK
(Italized parts are unique to MITRE ATT&CK)
You are a cybersecurity expert specializing in cyber threat intelligence. Given the text below, please
generate a maximum of 5 multiple-choice questions with four possible options each.
Follow these requirements:
1. Question Format: Each question must have four options. The options should be challenging and require
careful consideration. Avoid creating options that could be interpreted as correct under different circumstances.
2. Target Audience: The questions should be suitable for security professionals with three to five years of
experience in cyber threat intelligence. Avoid generic questions such as “What is the objective?”, “Which
operating system can be targeted?”.
3. Content Coverage: Aim to cover various sections of the document to ensure a comprehensive evaluation of
the candidate’s knowledge. Include context-specific questions that require an understanding of the document’s
content.
4. Technical Precision: Use precise terminology and concepts relevant to cyber threat intelligence. Incorporate
situational or scenario-based questions where applicable.
5. Include Technique IDs and Names: Ensure that all questions, where applicable, mention both the ID and the
full name of the MITRE ATT&CK pattern technique.
6. Premise Inclusion: Each question should include a premise indicating it pertains to MITRE ATT&CK,
specifying the relevant platform (Enterprise, ICS, or Mobile) where necessary.
7. Output Format: Return the output in TSV format (must be tab-separated) with the following columns:
Question, Option A, Option B, Option C, Option D, Correct Answer (A, B, C, D), and Explanation.
Important: Only return the TSV content as specified. Do not include any additional text or commentary
outside the TSV format."""
    input_urls = [
        "https://attack.mitre.org/techniques/T1548/",
        "https://cwe.mitre.org/data/definitions/346.html"
    ]

    main(input_prompt, input_urls)


Generated Answer:
content {
  parts {
    text: "**Question 1**\nWithin the context of MITRE ATT&CK, which sub-technique under the Abuse Elevation Control Mechanism technique involves exploiting built-in mechanisms that elevate privileges on a system? | Option A: Bypass User Account Control | Option B: Sudo and Sudo Caching | Option C: Elevated Execution with Prompt | Option D: Temporary Elevated Cloud Access | Correct Answer: B | Explanation: The Sudo and Sudo Caching sub-technique (T1548.003) allows adversaries to abuse the built-in sudo mechanism to gain elevated privileges on a system.\n\n**Question 2**\nWhich of the following is a common consequence of abusing elevation control mechanisms? | Option A: Denial of Service | Option B: Unauthorized access to data | Option C: Data integrity violation | Option D: Information disclosure | Correct Answer: B | Explanation: Abusing elevation control mechanisms can lead to unauthorized access to data, as adversaries can gain higher-level perm

In [None]:
import csv
import re

# Function to read the TSV file and parse its content
def parse_generated_tsv(input_file, output_file):
    with open(input_file, 'r', newline='', encoding='utf-8') as tsvfile:
        reader = csv.reader(tsvfile, delimiter='\t')
        header = next(reader)

        # Assume the content is in the first row and first column
        for row in reader:
            generated_content = row[0]
            break

    lines = generated_content.split('\n')
    questions = []
    current_question = None
    answer_key = {}

    question_pattern = re.compile(r"Question \d+:")
    option_pattern = re.compile(r"\((A|B|C|D)\)")

    for line in lines:
        if question_pattern.search(line):
            if current_question:
                questions.append(current_question)
            current_question = {"Question": question_pattern.split(line)[1].strip()}
        elif option_pattern.search(line):
            options = option_pattern.split(line)
            if current_question:
                current_question["Question"] += " " + options[0].strip()
            for i in range(1, len(options) - 1, 2):
                option_label = options[i]
                option_text = options[i + 1].strip()
                current_question[f"Option {option_label}"] = option_text
        elif line.startswith("1.") or line.startswith("2.") or line.startswith("3.") or line.startswith("4.") or line.startswith("5."):
            parts = line.split(". ")
            question_number = parts[0].strip()
            correct_answer = parts[1].strip()
            answer_key[question_number] = correct_answer

    if current_question:
        questions.append(current_question)

    for i, question in enumerate(questions):
        question_number = str(i + 1)
        if question_number in answer_key:
            question["Correct Answer"] = answer_key[question_number]

    # Writing to TSV file with new structure
    with open(output_file, 'w', newline='', encoding='utf-8') as tsvfile:
        writer = csv.DictWriter(tsvfile, fieldnames=["Question", "Option A", "Option B", "Option C", "Option D", "Correct Answer"], delimiter='\t')
        writer.writeheader()
        for question in questions:
            writer.writerow(question)

# Specify the input and output file paths
input_file = '/content/questions.tsv'
output_file = '/content/parsed_questions.tsv'

# Parse the input TSV file and write to the output TSV file
parse_generated_tsv(input_file, output_file)

print(f'Parsed TSV file has been saved to {output_file}')


Parsed TSV file has been saved to /content/parsed_questions.tsv


In [None]:
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai
import time
import csv

# Function to scrape content from a URL
def scrape_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return ""

# Function to generate an answer using the Gemini API
def generate_answer(prompt, scraped_content, gemini_key):
    combined_content = f"{prompt}\n\n{scraped_content}"

    if not gemini_key:
        raise ValueError("API key for Gemini is not set. Please set the GEMINI_API_KEY environment variable.")

    # Configure the Gemini API
    genai.configure(api_key=gemini_key)

    # Configuration of the model
    generation_config = {
        "temperature": 0.9,
        "top_p": 1,
        "top_k": 5,
        "max_output_tokens": 2048,
    }

    # Call the generative model with the configured settings
    model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
    response = model.generate_content(combined_content)

    # Extract the text from the response
    if response.candidates and len(response.candidates) > 0:
        generated_text = response.candidates[0].content.parts[0].text  # Adjust based on the structure
    else:
        generated_text = "No content generated."

    return generated_text

# Function to parse generated text and save to CSV
def save_to_csv(generated_text, filename='questions.csv'):
    # Open CSV file in append mode
    with open(filename, mode='a', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        # Write headers if the file is empty
        csvfile.seek(0)
        if csvfile.read(1) == "":
            writer.writerow(["Question", "Option A", "Option B", "Option C", "Option D", "Correct Answer", "Explanation"])

        # Parsing the generated text assuming it matches the expected format
        lines = generated_text.split("\n")
        question, option_a, option_b, option_c, option_d, correct_answer, explanation = "", "", "", "", "", "", ""

        for line in lines:
            if line.startswith("Question:"):
                question = line.replace("Question:", "").strip()
            elif line.startswith("A)"):
                option_a = line.replace("A)", "").strip()
            elif line.startswith("B)"):
                option_b = line.replace("B)", "").strip()
            elif line.startswith("C)"):
                option_c = line.replace("C)", "").strip()
            elif line.startswith("D)"):
                option_d = line.replace("D)", "").strip()
            elif line.startswith("### Response :"):
                correct_answer = line.replace("### Response :", "").strip()
            elif line.startswith("Explanation:"):
                explanation = line.replace("Explanation:", "").strip()

        # Write row to CSV file
        writer.writerow([question, option_a, option_b, option_c, option_d, correct_answer, explanation])

# Main function to process input and generate the output
def main(prompt, urls):
    gemini_key = "YOUR_GEMINI_API_KEY"  # Replace with your actual API key
    scraped_content = ""
    for url in urls:
        print(f"Scraping content from {url}...")
        scraped_content += scrape_content(url) + "\n\n"

    if not scraped_content.strip():
        print("No content scraped from the provided URLs.")
        return

    for i in range(200):
        try:
            # Generate answer
            generated_text = generate_answer(prompt, scraped_content, gemini_key)
            if "No content generated" in generated_text:
                raise ValueError("Content generation failed.")

            print(f"Generated Answer for iteration {i + 1}:")
            print(generated_text)
            save_to_csv(generated_text, filename=f'questions.csv')

            # Delay to respect rate limits
            print(f"Waiting to respect rate limits. Request #{i+1}")
            time.sleep(4)  # 4 seconds delay to stay within 15 requests per minute

        except requests.exceptions.Timeout as e:
            print(f"Request timed out on iteration {i + 1}. Retrying after delay.")
            time.sleep(10)  # Wait 10 seconds on timeout, then retry
        except Exception as e:
            print(f"Skipping iteration {i + 1} due to error: {e}")

if __name__ == "__main__":
    # Example usage
    input_prompt = """Prompt for generating MCQs with gemini
You are a cybersecurity expert specializing in cyber threat intelligence. Given the text below, please
generate a maximum of 5 multiple-choice questions with four possible options each.
Follow these requirements:
1. Question Format: Each question must have four options. The options should be challenging and require
careful consideration. Avoid creating options that could be interpreted as correct under different circumstances.
2. Target Audience: The questions should be suitable for security professionals with three to five years of
experience in cyber threat intelligence. Avoid generic questions such as “What is the objective?”, “Which
operating system can be targeted?”.
3. Content Coverage: Aim to cover various sections of the document to ensure a comprehensive evaluation of
the candidate’s knowledge. Include context-specific questions that require an understanding of the document’s
content.
4. Technical Precision: Use precise terminology and concepts relevant to cyber threat intelligence. Incorporate
situational or scenario-based questions where applicable.
5. Include Technique IDs and Names: Ensure that all questions, where applicable, mention both the ID and the
full name of the MITRE ATT&CK pattern technique.
6. Premise Inclusion: Each question should include a premise indicating it pertains to MITRE ATT&CK,
specifying the relevant platform (Enterprise, ICS, or Mobile) where necessary.
7. Output Format: Return the output in text format with these columns:
Question, Option A, Option B, Option C, Option D, Correct Answer (A, B, C, D), and Explanation.
"""
    input_urls = [
        "https://www.nist.gov/cyberframework",
    ]

    main(input_prompt, input_urls)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

**{Question 5}**

MITRE ATT&CK (Enterprise)

Which of the following is a common impact of a denial-of-service (DoS) attack?

{Options:}
A) Data exfiltration
B) System instability
C) Unauthorized access
D) File encryption

### {GT} B
{Explanation:} Denial-of-Service (T1499) attacks disrupt the availability of services, resulting in system instability and performance degradation.
Waiting to respect rate limits. Request #17
Generated Answer for iteration 18:
**Question 1:**

**MITRE ATT&CK (Enterprise):** Which technique involves identifying and exploiting vulnerable operating systems by searching for specific versions or configurations?

**Options:**
A) Exploitation for Access (T1001)
B) Valid Accounts (T1078)
C) Command and Scripting Interpreter (T1059)
D) Input Validation (T1190)

**Correct Answer:** A

**Explanation:** T1001: Exploitation for Access involves identifying and exploiting vulnerabilities in operating system

In [None]:
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai
import time
import csv

# Function to scrape content from a URL
def scrape_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return ""

# Function to generate an answer using the Gemini API
def generate_answer(prompt, scraped_content, gemini_key):
    combined_content = f"{prompt}\n\n{scraped_content}"

    if not gemini_key:
        raise ValueError("API key for Gemini is not set. Please set the GEMINI_API_KEY environment variable.")

    # Configure the Gemini API
    genai.configure(api_key=gemini_key)

    # Configuration of the model
    generation_config = {
        "temperature": 0.9,
        "top_p": 1,
        "top_k": 5,
        "max_output_tokens": 2048,
    }

    # Call the generative model with the configured settings
    model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
    response = model.generate_content(combined_content)

    # Extract the text from the response
    if response.candidates and len(response.candidates) > 0:
        generated_text = response.candidates[0].content.parts[0].text  # Adjust based on the structure
    else:
        generated_text = "No content generated."

    return generated_text

# Function to parse generated text and save to TSV
def save_to_tsv(generated_text, filename='questions.tsv'):
    # Open TSV file in append mode
    with open(filename, mode='a', newline='', encoding='utf-8') as tsvfile:
        writer = csv.writer(tsvfile, delimiter='\t')
        # Write headers if the file is empty
        tsvfile.seek(0)
        if tsvfile.read(1) == "":
            writer.writerow(["Question", "Option A", "Option B", "Option C", "Option D", "Correct Answer", ])

        # Parsing the generated text assuming it matches the expected format
        lines = generated_text.split("\n")
        question, option_a, option_b, option_c, option_d, correct_answer, explanation = "", "", "", "", "", "", ""

        for line in lines:
            if line.lower().startswith("question"):
                question = line.split(":", 1)[-1].strip()
            elif line.startswith("A)"):
                option_a = line[3:].strip()
            elif line.startswith("B)"):
                option_b = line[3:].strip()
            elif line.startswith("C)"):
                option_c = line[3:].strip()
            elif line.startswith("D)"):
                option_d = line[3:].strip()
            elif line.lower().startswith("correct answer"):
                correct_answer = line.split(":", 1)[-1].strip()


        # Write row to TSV file
        writer.writerow([question, option_a, option_b, option_c, option_d, correct_answer,])

# Main function to process input and generate the output
def main(prompt, urls):
    gemini_key = "AIzaSyD0YFrE5vvTOj-KETcVKU-jFxA8Eev89Y4"  # Replace with your actual API key
    scraped_content = ""
    for url in urls:
        print(f"Scraping content from {url}...")
        scraped_content += scrape_content(url) + "\n\n"

    if not scraped_content.strip():
        print("No content scraped from the provided URLs.")
        return

    for i in range(2):
        try:
            # Generate answer
            generated_text = generate_answer(prompt, scraped_content, gemini_key)
            if "No content generated" in generated_text:
                raise ValueError("Content generation failed.")

            print(f"Generated Answer for iteration {i + 1}:")
            print(generated_text)
            save_to_tsv(generated_text, filename='questions.tsv')

            # Delay to respect rate limits
            print(f"Waiting to respect rate limits. Request #{i+1}")
            time.sleep(4)  # 4 seconds delay to stay within 15 requests per minute

        except requests.exceptions.Timeout as e:
            print(f"Request timed out on iteration {i + 1}. Retrying after delay.")
            time.sleep(10)  # Wait 10 seconds on timeout, then retry
        except Exception as e:
            print(f"Skipping iteration {i + 1} due to error: {e}")

if __name__ == "__main__":
    # Example usage
    input_prompt = """Prompt for generating MCQs with ChatGPT-4o from MITRE ATT&CK
(Italized parts are unique to MITRE ATT&CK)
You are a cybersecurity expert specializing in cyber threat intelligence. Given the text below, please
generate a maximum of 5 multiple-choice questions with four possible options each.
Follow these requirements:
1. Question Format: Each question must have four options. The options should be challenging and require
careful consideration. Avoid creating options that could be interpreted as correct under different circumstances.
2. Target Audience: The questions should be suitable for security professionals with three to five years of
experience in cyber threat intelligence. Avoid generic questions such as “What is the objective?”, “Which
operating system can be targeted?”.
3. Content Coverage: Aim to cover various sections of the document to ensure a comprehensive evaluation of
the candidate’s knowledge. Include context-specific questions that require an understanding of the document’s
content.
4. Technical Precision: Use precise terminology and concepts relevant to cyber threat intelligence. Incorporate
situational or scenario-based questions where applicable.
5. Include Technique IDs and Names: Ensure that all questions, where applicable, mention both the ID and the
full name of the MITRE ATT&CK pattern technique.
6. Premise Inclusion: Each question should include a premise indicating it pertains to MITRE ATT&CK,
specifying the relevant platform (Enterprise, ICS, or Mobile) where necessary.
7. Output Format: Return the output in TSV format (must be tab-separated) with the following columns:
Question, Option A, Option B, Option C, Option D, Correct Answer (A, B, C, D).
Important: Only return the TSV content as specified. Do not include any additional text or commentary outside the TSV format.
Text:
"""
    input_urls = [
        "https://www.nist.gov/cyberframework",
    ]

    main(input_prompt, input_urls)


Scraping content from https://www.nist.gov/cyberframework...
Skipping iteration 1 due to error: list index (0) out of range
Skipping iteration 2 due to error: list index (0) out of range


In [None]:
import csv

# Define function to parse the text file and save to CSV
def parse_text_to_csv(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    data = []
    question, option_a, option_b, option_c, option_d, correct_answer = "", "", "", "", "", ""

    for line in lines:
        line = line.strip()

        if line.endswith("?"):
            question = line  # Identifies the question based on '?' at the end

        elif line.startswith("A)"):
            option_a = line[3:].strip()  # Option A
        elif line.startswith("B)"):
            option_b = line[3:].strip()  # Option B
        elif line.startswith("C)"):
            option_c = line[3:].strip()  # Option C
        elif line.startswith("D)"):
            option_d = line[3:].strip()  # Option D

        elif line.startswith("### {GT}"):
            correct_answer = line.split("### {GT}")[1].strip()  # Extracts the correct answer

            # Append the row to data list
            data.append({
                "Question": question,
                "Option A": option_a,
                "Option B": option_b,
                "Option C": option_c,
                "Option D": option_d,
                "Correct Answer": correct_answer
            })

            # Reset variables for the next question set
            question, option_a, option_b, option_c, option_d, correct_answer = "", "", "", "", "", ""

    # Write the parsed data to CSV
    with open(output_file, mode='w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=["Question", "Option A", "Option B", "Option C", "Option D", "Correct Answer"])
        writer.writeheader()
        writer.writerows(data)

    print(f"Data has been written to {output_file}")

# Specify your input and output file paths
input_file = "/content/QCmtxt.txt"  # Replace with the path to your text file
output_file = "parsed_questions.csv"

# Run the function
parse_text_to_csv(input_file, output_file)


Data has been written to parsed_questions.csv


In [None]:
import csv
import re

# Define function to parse the text file and save to CSV
def parse_text_to_csv(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    data = []
    question, option_a, option_b, option_c, option_d, correct_answer = "", "", "", "", "", ""

    for line in lines:
        line = line.strip()

        # Detect a question line based on ending with '?'
        if line.endswith("?") and not line.startswith("Option") and "Question" not in line:
            # Save the previous question data if it exists
            if question:
                data.append({
                    "Question": question,
                    "Option A": option_a,
                    "Option B": option_b,
                    "Option C": option_c,
                    "Option D": option_d,
                    "Correct Answer": correct_answer
                })
                # Reset values for the next question
                question, option_a, option_b, option_c, option_d, correct_answer = "", "", "", "", "", ""
            question = line  # Start a new question

        elif line.startswith("A)"):
            option_a = line[3:].strip()  # Option A
        elif line.startswith("B)"):
            option_b = line[3:].strip()  # Option B
        elif line.startswith("C)"):
            option_c = line[3:].strip()  # Option C
        elif line.startswith("D)"):
            option_d = line[3:].strip()  # Option D

        # Match any of the correct answer patterns
        elif "### Response :" in line or "Correct Answer:" in line or "### {GT}" in line or "Correct Answer (" in line:
            # Extract the answer by finding the first capital letter (A-D) after the matched phrase
            match = re.search(r"[A-D]", line)
            if match:
                correct_answer = match.group(0).strip()

        # Ignore any line that does not match the expected format
        else:
            continue

    # Add the last question if it exists after the loop
    if question:
        data.append({
            "Question": question,
            "Option A": option_a,
            "Option B": option_b,
            "Option C": option_c,
            "Option D": option_d,
            "Correct Answer": correct_answer
        })

    # Write the parsed data to CSV
    with open(output_file, mode='w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=["Question", "Option A", "Option B", "Option C", "Option D", "Correct Answer"])
        writer.writeheader()
        writer.writerows(data)

    print(f"Data has been written to {output_file}")

# Specify your input and output file paths
input_file = "QCmtxt.txt"  # Replace with the path to your text file
output_file = "parsed_questions.csv"

# Run the function
parse_text_to_csv(input_file, output_file)


Data has been written to parsed_questions.csv


In [None]:
import csv
import re

def parse_text_to_csv(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    data = []
    question = ""
    option_a = option_b = option_c = option_d = ""
    correct_answer = ""

    for line in lines:
        line = line.strip()

        if line.endswith("?"):
            question = line
        elif line.startswith("A)"):
            option_a = line[3:].strip()
        elif line.startswith("B)"):
            option_b = line[3:].strip()
        elif line.startswith("C)"):
            option_c = line[3:].strip()
        elif line.startswith("D)"):
            option_d = line[3:].strip()
        elif re.search(r"(Correct Answer:\s*[A-D])|(\*\*### Response:?\s*\*\*[A-D])|(\*\*Correct Answer:\*\*\s*[A-D])|(###\s*\{GT\}\s*[A-D])", line):
            correct_answer_match = re.search(r"([A-D])", line)
            if correct_answer_match:
                correct_answer = correct_answer_match.group(1)

        # When we reach an empty line, we assume the end of the question block
        if line == "":
            if question and option_a and option_b and option_c and option_d and correct_answer:
                data.append([question, option_a, option_b, option_c, option_d, correct_answer])
            # Reset for the next question block
            question = ""
            option_a = option_b = option_c = option_d = ""
            correct_answer = ""

    # Write to CSV
    with open(output_file, 'w', encoding='utf-8', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Question", "Option A", "Option B", "Option C", "Option D", "Correct Answer"])
        writer.writerows(data)

# Specify input and output files
input_file = "/content/QCmtxt.txt"  # Path to your text file
output_file = "/content/parsed_questions.csv"  # Path to save your CSV file

# Parse and save to CSV
parse_text_to_csv(input_file, output_file)
