In [1]:
def get_api_key(file_path):
    with open(file_path, 'r') as file:
        line = file.readline().strip()
        # Assuming the file contains only one line with the format KEY=value
        key, value = line.split('=', 1)
        return value

# Use the API key
GROQ_API_KEY = get_api_key('api_key_llama2.txt')

In [2]:
!pip install groq




[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import pandas as pd
import time
from groq import Groq
from tqdm import tqdm

# Function to read the API key
def get_api_key(file_path):
    with open(file_path, 'r') as file:
        line = file.readline().strip()
        key, value = line.split('=', 1)
        return value

# Use the API key
GROQ_API_KEY = get_api_key('api_key_llama2.txt')
client = Groq(api_key=GROQ_API_KEY)

# Load the dataset
df = pd.read_csv('cti-mcq.tsv', sep='\t', encoding='ISO-8859-1')

# Define the start and end lines for processing
start_line = 950
end_line = 1000
df_limited = df.iloc[start_line:end_line]

# Define the new prompt format
My_prompt = """You are given a multiple-choice question (MCQ) from a Cyber Threat Intelligence (CTI) knowledge benchmark dataset. Your task is to choose the best option among the four provided. Return your answer as a single uppercase letter: A, B, C, or D. :

### Question: 
{Question}

### Input:
A) {Option_A}
B) {Option_B}
C) {Option_C}
D) {Option_D}

### Response: """

# Path to the output TSV file
output_file = 'responses_llama3-8B.tsv'

# Write the responses to the TSV file
with open(output_file, 'a') as file:
   
    for index, row in tqdm(df_limited.iterrows(), total=df_limited.shape[0], desc="Processing Prompts"):
        # Prepare the prompt using the new format
        prompt = My_prompt.format(
            Question=row['Question'],
            Option_A=row['Option A'],
            Option_B=row['Option B'],
            Option_C=row['Option C'],
            Option_D=row['Option D']
        )

        # Get the response from the llama3 model using Groq
        completion = client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[{"role": "user", "content": prompt}],
            temperature=1,
            max_tokens=50,
            top_p=1,
            stream=False
        )

        # Extract and clean the response
        response_text = completion.choices[0].message.content.strip()
        cleaned_response = response_text.replace("\n", " ").replace("\t", " ")

        # Write the response to the TSV file
        file.write(f'{cleaned_response}\n')

        # Sleep for 3 seconds between requests to avoid rate-limiting
        time.sleep(1)

print("TSV file created successfully.")


Processing Prompts:  94%|█████████████████████████████████████████████████████████▎   | 94/100 [03:50<00:16,  2.74s/it]

In [1]:
import re
import pandas as pd

# Define a function to extract the first standalone uppercase letter A, B, C, or D
def extract_first_letter(content):
    if isinstance(content, str):
        # Use regex to match variations like "A)", " A ", "**A**"
        match = re.search(r'\b[A-D]\b|\b[A-D]\)|\*\*[A-D]\*\*', content)
        if match:
            # Clean the match to return just the letter
            letter = match.group(0)
            return re.sub(r'[^A-D]', '', letter)  # Remove any extra characters like **, )
        return ''
    return ''

# Path to the existing TSV file with responses
input_file = 'responses_llama3-8B.tsv'

# Attempt to load the responses from the TSV file with different encodings
encodings = ['utf-8', 'latin1', 'utf-16', 'ISO-8859-1']  # List of encodings to try
df = None

for encoding in encodings:
    try:
        df = pd.read_csv(input_file, sep='\t', header=None, names=['Response'], encoding=encoding, engine='python')
        print(f"Successfully loaded with encoding: {encoding}")
        break
    except UnicodeDecodeError as e:
        print(f"Failed to load with encoding {encoding}: {e}")

if df is None:
    raise ValueError("Failed to load the file with all attempted encodings.")

# Prepare a list to hold the results
letters = []

# Extract the first standalone letter from each response
for content in df['Response']:
    letter = extract_first_letter(content)
    letters.append(letter)

# Convert results to DataFrame
letters_df = pd.DataFrame({'first_letter': letters})

# Save to a new TSV file
output_file = 'LLAMMA3_RESPONSES.tsv'
letters_df.to_csv(output_file, sep='\t', index=False)

print("First letters extracted and saved successfully!")


Successfully loaded with encoding: utf-8
First letters extracted and saved successfully!


In [11]:
import pandas as pd

encodings = ['utf-8', 'latin1', 'utf-16', 'ISO-8859-1']  # List of encodings to try

df = pd.read_csv('cti-mcq.tsv', sep='\t', encoding='ISO-8859-1')
llama_responses = pd.read_csv('LLAMMA3_RESPONSES.tsv', sep='\t',encoding='utf-8')

# Extraire les 900 premières lignes pour les réponses correctes (GT) et les réponses générées
gt_answers = df['GT'].head(900).tolist()  # Liste des réponses correctes
generated_answers = llama_responses['first_letter'].head(900).tolist()  # Liste des réponses générées

# Comparer les réponses et calculer le pourcentage de bonnes réponses
correct_answers = 0
total_questions = len(gt_answers)  # Devrait être 900

# Comparaison des deux colonnes
for gt, generated in zip(gt_answers, generated_answers):
    if gt == generated:
        correct_answers += 1

# Calcul du pourcentage de bonnes réponses
accuracy_percentage = (correct_answers / total_questions) * 100

# Afficher le résultat
print(f"Le pourcentage de bonnes réponses du modèle (Llama3) est : {accuracy_percentage:.2f}%")


Le pourcentage de bonnes réponses du modèle (Llama3) est : 37.33%


#### Comparer les réponses de LLAMA3 sur cti_mcq avec les réponses du dataset du benchmark

In [12]:
import pandas as pd

encodings = ['utf-8', 'latin1', 'utf-16', 'ISO-8859-1']  # List of encodings to try

df = pd.read_csv('cti-mcq-responses.tsv', sep='\t', encoding='ISO-8859-1')
llama_responses = pd.read_csv('LLAMMA3_RESPONSES.tsv', sep='\t',encoding='utf-8')

# Extraire les 900 premières lignes pour les réponses correctes (GT) et les réponses générées
gt_answers = df['LLAMA3-8B'].head(900).tolist()  # Liste des réponses correctes
generated_answers = llama_responses['first_letter'].head(900).tolist()  # Liste des réponses générées

# Comparer les réponses et calculer le pourcentage de bonnes réponses
correct_answers = 0
total_questions = len(gt_answers)  # Devrait être 900

# Comparaison des deux colonnes
for gt, generated in zip(gt_answers, generated_answers):
    if gt == generated:
        correct_answers += 1

# Calcul du pourcentage de bonnes réponses
accuracy_percentage = (correct_answers / total_questions) * 100

# Afficher le résultat
print(f"Le pourcentage de bonnes réponses du modèle (Llama3) est : {accuracy_percentage:.2f}%")


Le pourcentage de bonnes réponses du modèle (Llama3) est : 44.33%
